1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512cd,-avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512CD 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512cd,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512CDBW 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,-avx512cd,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,-avx512cd,-avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ 6 7 define <8 x i64> @testv8i64(<8 x i64> %in) nounwind { 8 ; AVX512CD-LABEL: testv8i64: 9 ; AVX512CD: # %bb.0: 10 ; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0 11 ; AVX512CD-NEXT: retq 12 ; 13 ; AVX512CDBW-LABEL: testv8i64: 14 ; AVX512CDBW: # %bb.0: 15 ; AVX512CDBW-NEXT: vplzcntq %zmm0, %zmm0 16 ; AVX512CDBW-NEXT: retq 17 ; 18 ; AVX512BW-LABEL: testv8i64: 19 ; AVX512BW: # %bb.0: 20 ; AVX512BW-NEXT: vpsrlq $1, %zmm0, %zmm1 21 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 22 ; AVX512BW-NEXT: vpsrlq $2, %zmm0, %zmm1 23 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 24 ; AVX512BW-NEXT: vpsrlq $4, %zmm0, %zmm1 25 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 26 ; AVX512BW-NEXT: vpsrlq $8, %zmm0, %zmm1 27 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 28 ; AVX512BW-NEXT: vpsrlq $16, %zmm0, %zmm1 29 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 30 ; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm1 31 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 32 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 33 ; AVX512BW-NEXT: vpandnq %zmm1, %zmm0, %zmm2 34 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 35 ; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 36 ; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 37 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 38 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 39 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 40 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 41 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 42 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 43 ; AVX512BW-NEXT: retq 44 ; 45 ; AVX512DQ-LABEL: testv8i64: 46 ; AVX512DQ: # %bb.0: 47 ; AVX512DQ-NEXT: vpsrlq $1, %zmm0, %zmm1 48 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 49 ; AVX512DQ-NEXT: vpsrlq $2, %zmm0, %zmm1 50 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 51 ; AVX512DQ-NEXT: vpsrlq $4, %zmm0, %zmm1 52 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 53 ; AVX512DQ-NEXT: vpsrlq $8, %zmm0, %zmm1 54 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 55 ; AVX512DQ-NEXT: vpsrlq $16, %zmm0, %zmm1 56 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 57 ; AVX512DQ-NEXT: vpsrlq $32, %zmm0, %zmm1 58 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 59 ; AVX512DQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 60 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 61 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 62 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm3 63 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 64 ; AVX512DQ-NEXT: vpshufb %ymm3, %ymm4, %ymm3 65 ; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1 66 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1 67 ; AVX512DQ-NEXT: vpshufb %ymm1, %ymm4, %ymm1 68 ; AVX512DQ-NEXT: vpaddb %ymm3, %ymm1, %ymm1 69 ; AVX512DQ-NEXT: vpxor %xmm3, %xmm3, %xmm3 70 ; AVX512DQ-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 71 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm5 72 ; AVX512DQ-NEXT: vpshufb %ymm5, %ymm4, %ymm5 73 ; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0 74 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0 75 ; AVX512DQ-NEXT: vpshufb %ymm0, %ymm4, %ymm0 76 ; AVX512DQ-NEXT: vpaddb %ymm5, %ymm0, %ymm0 77 ; AVX512DQ-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 78 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 79 ; AVX512DQ-NEXT: retq 80 %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 0) 81 ret <8 x i64> %out 82 } 83 84 define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind { 85 ; AVX512CD-LABEL: testv8i64u: 86 ; AVX512CD: # %bb.0: 87 ; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0 88 ; AVX512CD-NEXT: retq 89 ; 90 ; AVX512CDBW-LABEL: testv8i64u: 91 ; AVX512CDBW: # %bb.0: 92 ; AVX512CDBW-NEXT: vplzcntq %zmm0, %zmm0 93 ; AVX512CDBW-NEXT: retq 94 ; 95 ; AVX512BW-LABEL: testv8i64u: 96 ; AVX512BW: # %bb.0: 97 ; AVX512BW-NEXT: vpsrlq $1, %zmm0, %zmm1 98 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 99 ; AVX512BW-NEXT: vpsrlq $2, %zmm0, %zmm1 100 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 101 ; AVX512BW-NEXT: vpsrlq $4, %zmm0, %zmm1 102 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 103 ; AVX512BW-NEXT: vpsrlq $8, %zmm0, %zmm1 104 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 105 ; AVX512BW-NEXT: vpsrlq $16, %zmm0, %zmm1 106 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 107 ; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm1 108 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 109 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 110 ; AVX512BW-NEXT: vpandnq %zmm1, %zmm0, %zmm2 111 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 112 ; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 113 ; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 114 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 115 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 116 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 117 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 118 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 119 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 120 ; AVX512BW-NEXT: retq 121 ; 122 ; AVX512DQ-LABEL: testv8i64u: 123 ; AVX512DQ: # %bb.0: 124 ; AVX512DQ-NEXT: vpsrlq $1, %zmm0, %zmm1 125 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 126 ; AVX512DQ-NEXT: vpsrlq $2, %zmm0, %zmm1 127 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 128 ; AVX512DQ-NEXT: vpsrlq $4, %zmm0, %zmm1 129 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 130 ; AVX512DQ-NEXT: vpsrlq $8, %zmm0, %zmm1 131 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 132 ; AVX512DQ-NEXT: vpsrlq $16, %zmm0, %zmm1 133 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 134 ; AVX512DQ-NEXT: vpsrlq $32, %zmm0, %zmm1 135 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 136 ; AVX512DQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 137 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 138 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 139 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm3 140 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 141 ; AVX512DQ-NEXT: vpshufb %ymm3, %ymm4, %ymm3 142 ; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1 143 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1 144 ; AVX512DQ-NEXT: vpshufb %ymm1, %ymm4, %ymm1 145 ; AVX512DQ-NEXT: vpaddb %ymm3, %ymm1, %ymm1 146 ; AVX512DQ-NEXT: vpxor %xmm3, %xmm3, %xmm3 147 ; AVX512DQ-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 148 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm5 149 ; AVX512DQ-NEXT: vpshufb %ymm5, %ymm4, %ymm5 150 ; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0 151 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0 152 ; AVX512DQ-NEXT: vpshufb %ymm0, %ymm4, %ymm0 153 ; AVX512DQ-NEXT: vpaddb %ymm5, %ymm0, %ymm0 154 ; AVX512DQ-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 155 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 156 ; AVX512DQ-NEXT: retq 157 %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 -1) 158 ret <8 x i64> %out 159 } 160 161 define <16 x i32> @testv16i32(<16 x i32> %in) nounwind { 162 ; AVX512CD-LABEL: testv16i32: 163 ; AVX512CD: # %bb.0: 164 ; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 165 ; AVX512CD-NEXT: retq 166 ; 167 ; AVX512CDBW-LABEL: testv16i32: 168 ; AVX512CDBW: # %bb.0: 169 ; AVX512CDBW-NEXT: vplzcntd %zmm0, %zmm0 170 ; AVX512CDBW-NEXT: retq 171 ; 172 ; AVX512BW-LABEL: testv16i32: 173 ; AVX512BW: # %bb.0: 174 ; AVX512BW-NEXT: vpsrld $1, %zmm0, %zmm1 175 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 176 ; AVX512BW-NEXT: vpsrld $2, %zmm0, %zmm1 177 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 178 ; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1 179 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 180 ; AVX512BW-NEXT: vpsrld $8, %zmm0, %zmm1 181 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 182 ; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm1 183 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 184 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 185 ; AVX512BW-NEXT: vpandnq %zmm1, %zmm0, %zmm2 186 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 187 ; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 188 ; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 189 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 190 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 191 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 192 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 193 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 194 ; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 195 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 196 ; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 197 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 198 ; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 199 ; AVX512BW-NEXT: retq 200 ; 201 ; AVX512DQ-LABEL: testv16i32: 202 ; AVX512DQ: # %bb.0: 203 ; AVX512DQ-NEXT: vpsrld $1, %zmm0, %zmm1 204 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 205 ; AVX512DQ-NEXT: vpsrld $2, %zmm0, %zmm1 206 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 207 ; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1 208 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 209 ; AVX512DQ-NEXT: vpsrld $8, %zmm0, %zmm1 210 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 211 ; AVX512DQ-NEXT: vpsrld $16, %zmm0, %zmm1 212 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 213 ; AVX512DQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 214 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 215 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 216 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm3 217 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 218 ; AVX512DQ-NEXT: vpshufb %ymm3, %ymm4, %ymm3 219 ; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1 220 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1 221 ; AVX512DQ-NEXT: vpshufb %ymm1, %ymm4, %ymm1 222 ; AVX512DQ-NEXT: vpaddb %ymm3, %ymm1, %ymm1 223 ; AVX512DQ-NEXT: vpxor %xmm3, %xmm3, %xmm3 224 ; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7] 225 ; AVX512DQ-NEXT: vpsadbw %ymm3, %ymm5, %ymm5 226 ; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5] 227 ; AVX512DQ-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 228 ; AVX512DQ-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 229 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm5 230 ; AVX512DQ-NEXT: vpshufb %ymm5, %ymm4, %ymm5 231 ; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0 232 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0 233 ; AVX512DQ-NEXT: vpshufb %ymm0, %ymm4, %ymm0 234 ; AVX512DQ-NEXT: vpaddb %ymm5, %ymm0, %ymm0 235 ; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7] 236 ; AVX512DQ-NEXT: vpsadbw %ymm3, %ymm2, %ymm2 237 ; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5] 238 ; AVX512DQ-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 239 ; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 240 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 241 ; AVX512DQ-NEXT: retq 242 %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 0) 243 ret <16 x i32> %out 244 } 245 246 define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind { 247 ; AVX512CD-LABEL: testv16i32u: 248 ; AVX512CD: # %bb.0: 249 ; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 250 ; AVX512CD-NEXT: retq 251 ; 252 ; AVX512CDBW-LABEL: testv16i32u: 253 ; AVX512CDBW: # %bb.0: 254 ; AVX512CDBW-NEXT: vplzcntd %zmm0, %zmm0 255 ; AVX512CDBW-NEXT: retq 256 ; 257 ; AVX512BW-LABEL: testv16i32u: 258 ; AVX512BW: # %bb.0: 259 ; AVX512BW-NEXT: vpsrld $1, %zmm0, %zmm1 260 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 261 ; AVX512BW-NEXT: vpsrld $2, %zmm0, %zmm1 262 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 263 ; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1 264 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 265 ; AVX512BW-NEXT: vpsrld $8, %zmm0, %zmm1 266 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 267 ; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm1 268 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 269 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 270 ; AVX512BW-NEXT: vpandnq %zmm1, %zmm0, %zmm2 271 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 272 ; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 273 ; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 274 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 275 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 276 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 277 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 278 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 279 ; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 280 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 281 ; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 282 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 283 ; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 284 ; AVX512BW-NEXT: retq 285 ; 286 ; AVX512DQ-LABEL: testv16i32u: 287 ; AVX512DQ: # %bb.0: 288 ; AVX512DQ-NEXT: vpsrld $1, %zmm0, %zmm1 289 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 290 ; AVX512DQ-NEXT: vpsrld $2, %zmm0, %zmm1 291 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 292 ; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1 293 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 294 ; AVX512DQ-NEXT: vpsrld $8, %zmm0, %zmm1 295 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 296 ; AVX512DQ-NEXT: vpsrld $16, %zmm0, %zmm1 297 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 298 ; AVX512DQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 299 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 300 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 301 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm3 302 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 303 ; AVX512DQ-NEXT: vpshufb %ymm3, %ymm4, %ymm3 304 ; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1 305 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1 306 ; AVX512DQ-NEXT: vpshufb %ymm1, %ymm4, %ymm1 307 ; AVX512DQ-NEXT: vpaddb %ymm3, %ymm1, %ymm1 308 ; AVX512DQ-NEXT: vpxor %xmm3, %xmm3, %xmm3 309 ; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7] 310 ; AVX512DQ-NEXT: vpsadbw %ymm3, %ymm5, %ymm5 311 ; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5] 312 ; AVX512DQ-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 313 ; AVX512DQ-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 314 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm5 315 ; AVX512DQ-NEXT: vpshufb %ymm5, %ymm4, %ymm5 316 ; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0 317 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0 318 ; AVX512DQ-NEXT: vpshufb %ymm0, %ymm4, %ymm0 319 ; AVX512DQ-NEXT: vpaddb %ymm5, %ymm0, %ymm0 320 ; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7] 321 ; AVX512DQ-NEXT: vpsadbw %ymm3, %ymm2, %ymm2 322 ; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5] 323 ; AVX512DQ-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 324 ; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 325 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 326 ; AVX512DQ-NEXT: retq 327 %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 -1) 328 ret <16 x i32> %out 329 } 330 331 define <32 x i16> @testv32i16(<32 x i16> %in) nounwind { 332 ; AVX512CD-LABEL: testv32i16: 333 ; AVX512CD: # %bb.0: 334 ; AVX512CD-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 335 ; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 336 ; AVX512CD-NEXT: vpmovdw %zmm0, %ymm0 337 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 338 ; AVX512CD-NEXT: vpsubw %ymm2, %ymm0, %ymm0 339 ; AVX512CD-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 340 ; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1 341 ; AVX512CD-NEXT: vpmovdw %zmm1, %ymm1 342 ; AVX512CD-NEXT: vpsubw %ymm2, %ymm1, %ymm1 343 ; AVX512CD-NEXT: retq 344 ; 345 ; AVX512CDBW-LABEL: testv32i16: 346 ; AVX512CDBW: # %bb.0: 347 ; AVX512CDBW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 348 ; AVX512CDBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 349 ; AVX512CDBW-NEXT: vplzcntd %zmm1, %zmm1 350 ; AVX512CDBW-NEXT: vpmovdw %zmm1, %ymm1 351 ; AVX512CDBW-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 352 ; AVX512CDBW-NEXT: vpsubw %ymm2, %ymm1, %ymm1 353 ; AVX512CDBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 354 ; AVX512CDBW-NEXT: vplzcntd %zmm0, %zmm0 355 ; AVX512CDBW-NEXT: vpmovdw %zmm0, %ymm0 356 ; AVX512CDBW-NEXT: vpsubw %ymm2, %ymm0, %ymm0 357 ; AVX512CDBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 358 ; AVX512CDBW-NEXT: retq 359 ; 360 ; AVX512BW-LABEL: testv32i16: 361 ; AVX512BW: # %bb.0: 362 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 363 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 364 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 365 ; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 366 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm4 367 ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm4, %k0 368 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm5 369 ; AVX512BW-NEXT: vpandq %zmm5, %zmm2, %zmm2 370 ; AVX512BW-NEXT: vpandq %zmm1, %zmm4, %zmm1 371 ; AVX512BW-NEXT: vpshufb %zmm1, %zmm3, %zmm1 372 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm2, %zmm1 373 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 374 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 375 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 376 ; AVX512BW-NEXT: vpandq %zmm0, %zmm1, %zmm0 377 ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 378 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 379 ; AVX512BW-NEXT: retq 380 ; 381 ; AVX512DQ-LABEL: testv32i16: 382 ; AVX512DQ: # %bb.0: 383 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 384 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm3 385 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 386 ; AVX512DQ-NEXT: vpshufb %ymm3, %ymm4, %ymm3 387 ; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm5 388 ; AVX512DQ-NEXT: vpand %ymm2, %ymm5, %ymm5 389 ; AVX512DQ-NEXT: vpxor %xmm6, %xmm6, %xmm6 390 ; AVX512DQ-NEXT: vpcmpeqb %ymm6, %ymm5, %ymm7 391 ; AVX512DQ-NEXT: vpand %ymm7, %ymm3, %ymm3 392 ; AVX512DQ-NEXT: vpshufb %ymm5, %ymm4, %ymm5 393 ; AVX512DQ-NEXT: vpaddb %ymm5, %ymm3, %ymm3 394 ; AVX512DQ-NEXT: vpcmpeqb %ymm6, %ymm0, %ymm0 395 ; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 396 ; AVX512DQ-NEXT: vpand %ymm0, %ymm3, %ymm0 397 ; AVX512DQ-NEXT: vpsrlw $8, %ymm3, %ymm3 398 ; AVX512DQ-NEXT: vpaddw %ymm0, %ymm3, %ymm0 399 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm3 400 ; AVX512DQ-NEXT: vpshufb %ymm3, %ymm4, %ymm3 401 ; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm5 402 ; AVX512DQ-NEXT: vpand %ymm2, %ymm5, %ymm2 403 ; AVX512DQ-NEXT: vpcmpeqb %ymm6, %ymm2, %ymm5 404 ; AVX512DQ-NEXT: vpand %ymm5, %ymm3, %ymm3 405 ; AVX512DQ-NEXT: vpshufb %ymm2, %ymm4, %ymm2 406 ; AVX512DQ-NEXT: vpaddb %ymm2, %ymm3, %ymm2 407 ; AVX512DQ-NEXT: vpcmpeqb %ymm6, %ymm1, %ymm1 408 ; AVX512DQ-NEXT: vpsrlw $8, %ymm1, %ymm1 409 ; AVX512DQ-NEXT: vpand %ymm1, %ymm2, %ymm1 410 ; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2 411 ; AVX512DQ-NEXT: vpaddw %ymm1, %ymm2, %ymm1 412 ; AVX512DQ-NEXT: retq 413 %out = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %in, i1 0) 414 ret <32 x i16> %out 415 } 416 417 define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind { 418 ; AVX512CD-LABEL: testv32i16u: 419 ; AVX512CD: # %bb.0: 420 ; AVX512CD-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 421 ; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 422 ; AVX512CD-NEXT: vpmovdw %zmm0, %ymm0 423 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 424 ; AVX512CD-NEXT: vpsubw %ymm2, %ymm0, %ymm0 425 ; AVX512CD-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 426 ; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1 427 ; AVX512CD-NEXT: vpmovdw %zmm1, %ymm1 428 ; AVX512CD-NEXT: vpsubw %ymm2, %ymm1, %ymm1 429 ; AVX512CD-NEXT: retq 430 ; 431 ; AVX512CDBW-LABEL: testv32i16u: 432 ; AVX512CDBW: # %bb.0: 433 ; AVX512CDBW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 434 ; AVX512CDBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 435 ; AVX512CDBW-NEXT: vplzcntd %zmm1, %zmm1 436 ; AVX512CDBW-NEXT: vpmovdw %zmm1, %ymm1 437 ; AVX512CDBW-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 438 ; AVX512CDBW-NEXT: vpsubw %ymm2, %ymm1, %ymm1 439 ; AVX512CDBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 440 ; AVX512CDBW-NEXT: vplzcntd %zmm0, %zmm0 441 ; AVX512CDBW-NEXT: vpmovdw %zmm0, %ymm0 442 ; AVX512CDBW-NEXT: vpsubw %ymm2, %ymm0, %ymm0 443 ; AVX512CDBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 444 ; AVX512CDBW-NEXT: retq 445 ; 446 ; AVX512BW-LABEL: testv32i16u: 447 ; AVX512BW: # %bb.0: 448 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 449 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 450 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 451 ; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 452 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm4 453 ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm4, %k0 454 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm5 455 ; AVX512BW-NEXT: vpandq %zmm5, %zmm2, %zmm2 456 ; AVX512BW-NEXT: vpandq %zmm1, %zmm4, %zmm1 457 ; AVX512BW-NEXT: vpshufb %zmm1, %zmm3, %zmm1 458 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm2, %zmm1 459 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 460 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 461 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 462 ; AVX512BW-NEXT: vpandq %zmm0, %zmm1, %zmm0 463 ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 464 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 465 ; AVX512BW-NEXT: retq 466 ; 467 ; AVX512DQ-LABEL: testv32i16u: 468 ; AVX512DQ: # %bb.0: 469 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 470 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm3 471 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 472 ; AVX512DQ-NEXT: vpshufb %ymm3, %ymm4, %ymm3 473 ; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm5 474 ; AVX512DQ-NEXT: vpand %ymm2, %ymm5, %ymm5 475 ; AVX512DQ-NEXT: vpxor %xmm6, %xmm6, %xmm6 476 ; AVX512DQ-NEXT: vpcmpeqb %ymm6, %ymm5, %ymm7 477 ; AVX512DQ-NEXT: vpand %ymm7, %ymm3, %ymm3 478 ; AVX512DQ-NEXT: vpshufb %ymm5, %ymm4, %ymm5 479 ; AVX512DQ-NEXT: vpaddb %ymm5, %ymm3, %ymm3 480 ; AVX512DQ-NEXT: vpcmpeqb %ymm6, %ymm0, %ymm0 481 ; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 482 ; AVX512DQ-NEXT: vpand %ymm0, %ymm3, %ymm0 483 ; AVX512DQ-NEXT: vpsrlw $8, %ymm3, %ymm3 484 ; AVX512DQ-NEXT: vpaddw %ymm0, %ymm3, %ymm0 485 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm3 486 ; AVX512DQ-NEXT: vpshufb %ymm3, %ymm4, %ymm3 487 ; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm5 488 ; AVX512DQ-NEXT: vpand %ymm2, %ymm5, %ymm2 489 ; AVX512DQ-NEXT: vpcmpeqb %ymm6, %ymm2, %ymm5 490 ; AVX512DQ-NEXT: vpand %ymm5, %ymm3, %ymm3 491 ; AVX512DQ-NEXT: vpshufb %ymm2, %ymm4, %ymm2 492 ; AVX512DQ-NEXT: vpaddb %ymm2, %ymm3, %ymm2 493 ; AVX512DQ-NEXT: vpcmpeqb %ymm6, %ymm1, %ymm1 494 ; AVX512DQ-NEXT: vpsrlw $8, %ymm1, %ymm1 495 ; AVX512DQ-NEXT: vpand %ymm1, %ymm2, %ymm1 496 ; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2 497 ; AVX512DQ-NEXT: vpaddw %ymm1, %ymm2, %ymm1 498 ; AVX512DQ-NEXT: retq 499 %out = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %in, i1 -1) 500 ret <32 x i16> %out 501 } 502 503 define <64 x i8> @testv64i8(<64 x i8> %in) nounwind { 504 ; AVX512CD-LABEL: testv64i8: 505 ; AVX512CD: # %bb.0: 506 ; AVX512CD-NEXT: vextracti128 $1, %ymm0, %xmm2 507 ; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero 508 ; AVX512CD-NEXT: vplzcntd %zmm2, %zmm2 509 ; AVX512CD-NEXT: vpmovdb %zmm2, %xmm2 510 ; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] 511 ; AVX512CD-NEXT: vpsubb %xmm3, %xmm2, %xmm2 512 ; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 513 ; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 514 ; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0 515 ; AVX512CD-NEXT: vpsubb %xmm3, %xmm0, %xmm0 516 ; AVX512CD-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 517 ; AVX512CD-NEXT: vextracti128 $1, %ymm1, %xmm2 518 ; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero 519 ; AVX512CD-NEXT: vplzcntd %zmm2, %zmm2 520 ; AVX512CD-NEXT: vpmovdb %zmm2, %xmm2 521 ; AVX512CD-NEXT: vpsubb %xmm3, %xmm2, %xmm2 522 ; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero 523 ; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1 524 ; AVX512CD-NEXT: vpmovdb %zmm1, %xmm1 525 ; AVX512CD-NEXT: vpsubb %xmm3, %xmm1, %xmm1 526 ; AVX512CD-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 527 ; AVX512CD-NEXT: retq 528 ; 529 ; AVX512CDBW-LABEL: testv64i8: 530 ; AVX512CDBW: # %bb.0: 531 ; AVX512CDBW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 532 ; AVX512CDBW-NEXT: vextracti128 $1, %ymm1, %xmm2 533 ; AVX512CDBW-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero 534 ; AVX512CDBW-NEXT: vplzcntd %zmm2, %zmm2 535 ; AVX512CDBW-NEXT: vpmovdb %zmm2, %xmm2 536 ; AVX512CDBW-NEXT: vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] 537 ; AVX512CDBW-NEXT: vpsubb %xmm3, %xmm2, %xmm2 538 ; AVX512CDBW-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero 539 ; AVX512CDBW-NEXT: vplzcntd %zmm1, %zmm1 540 ; AVX512CDBW-NEXT: vpmovdb %zmm1, %xmm1 541 ; AVX512CDBW-NEXT: vpsubb %xmm3, %xmm1, %xmm1 542 ; AVX512CDBW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 543 ; AVX512CDBW-NEXT: vextracti128 $1, %ymm0, %xmm2 544 ; AVX512CDBW-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero 545 ; AVX512CDBW-NEXT: vplzcntd %zmm2, %zmm2 546 ; AVX512CDBW-NEXT: vpmovdb %zmm2, %xmm2 547 ; AVX512CDBW-NEXT: vpsubb %xmm3, %xmm2, %xmm2 548 ; AVX512CDBW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 549 ; AVX512CDBW-NEXT: vplzcntd %zmm0, %zmm0 550 ; AVX512CDBW-NEXT: vpmovdb %zmm0, %xmm0 551 ; AVX512CDBW-NEXT: vpsubb %xmm3, %xmm0, %xmm0 552 ; AVX512CDBW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 553 ; AVX512CDBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 554 ; AVX512CDBW-NEXT: retq 555 ; 556 ; AVX512BW-LABEL: testv64i8: 557 ; AVX512BW: # %bb.0: 558 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 559 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 560 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 561 ; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 562 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 563 ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k0 564 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm4 565 ; AVX512BW-NEXT: vpandq %zmm4, %zmm2, %zmm2 566 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 567 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 568 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 569 ; AVX512BW-NEXT: retq 570 ; 571 ; AVX512DQ-LABEL: testv64i8: 572 ; AVX512DQ: # %bb.0: 573 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 574 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm3 575 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 576 ; AVX512DQ-NEXT: vpshufb %ymm3, %ymm4, %ymm3 577 ; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0 578 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0 579 ; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5 580 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm6 581 ; AVX512DQ-NEXT: vpand %ymm6, %ymm3, %ymm3 582 ; AVX512DQ-NEXT: vpshufb %ymm0, %ymm4, %ymm0 583 ; AVX512DQ-NEXT: vpaddb %ymm0, %ymm3, %ymm0 584 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm3 585 ; AVX512DQ-NEXT: vpshufb %ymm3, %ymm4, %ymm3 586 ; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1 587 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1 588 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm2 589 ; AVX512DQ-NEXT: vpand %ymm2, %ymm3, %ymm2 590 ; AVX512DQ-NEXT: vpshufb %ymm1, %ymm4, %ymm1 591 ; AVX512DQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1 592 ; AVX512DQ-NEXT: retq 593 %out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 0) 594 ret <64 x i8> %out 595 } 596 597 define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind { 598 ; AVX512CD-LABEL: testv64i8u: 599 ; AVX512CD: # %bb.0: 600 ; AVX512CD-NEXT: vextracti128 $1, %ymm0, %xmm2 601 ; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero 602 ; AVX512CD-NEXT: vplzcntd %zmm2, %zmm2 603 ; AVX512CD-NEXT: vpmovdb %zmm2, %xmm2 604 ; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] 605 ; AVX512CD-NEXT: vpsubb %xmm3, %xmm2, %xmm2 606 ; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 607 ; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 608 ; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0 609 ; AVX512CD-NEXT: vpsubb %xmm3, %xmm0, %xmm0 610 ; AVX512CD-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 611 ; AVX512CD-NEXT: vextracti128 $1, %ymm1, %xmm2 612 ; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero 613 ; AVX512CD-NEXT: vplzcntd %zmm2, %zmm2 614 ; AVX512CD-NEXT: vpmovdb %zmm2, %xmm2 615 ; AVX512CD-NEXT: vpsubb %xmm3, %xmm2, %xmm2 616 ; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero 617 ; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1 618 ; AVX512CD-NEXT: vpmovdb %zmm1, %xmm1 619 ; AVX512CD-NEXT: vpsubb %xmm3, %xmm1, %xmm1 620 ; AVX512CD-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 621 ; AVX512CD-NEXT: retq 622 ; 623 ; AVX512CDBW-LABEL: testv64i8u: 624 ; AVX512CDBW: # %bb.0: 625 ; AVX512CDBW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 626 ; AVX512CDBW-NEXT: vextracti128 $1, %ymm1, %xmm2 627 ; AVX512CDBW-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero 628 ; AVX512CDBW-NEXT: vplzcntd %zmm2, %zmm2 629 ; AVX512CDBW-NEXT: vpmovdb %zmm2, %xmm2 630 ; AVX512CDBW-NEXT: vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] 631 ; AVX512CDBW-NEXT: vpsubb %xmm3, %xmm2, %xmm2 632 ; AVX512CDBW-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero 633 ; AVX512CDBW-NEXT: vplzcntd %zmm1, %zmm1 634 ; AVX512CDBW-NEXT: vpmovdb %zmm1, %xmm1 635 ; AVX512CDBW-NEXT: vpsubb %xmm3, %xmm1, %xmm1 636 ; AVX512CDBW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 637 ; AVX512CDBW-NEXT: vextracti128 $1, %ymm0, %xmm2 638 ; AVX512CDBW-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero 639 ; AVX512CDBW-NEXT: vplzcntd %zmm2, %zmm2 640 ; AVX512CDBW-NEXT: vpmovdb %zmm2, %xmm2 641 ; AVX512CDBW-NEXT: vpsubb %xmm3, %xmm2, %xmm2 642 ; AVX512CDBW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 643 ; AVX512CDBW-NEXT: vplzcntd %zmm0, %zmm0 644 ; AVX512CDBW-NEXT: vpmovdb %zmm0, %xmm0 645 ; AVX512CDBW-NEXT: vpsubb %xmm3, %xmm0, %xmm0 646 ; AVX512CDBW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 647 ; AVX512CDBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 648 ; AVX512CDBW-NEXT: retq 649 ; 650 ; AVX512BW-LABEL: testv64i8u: 651 ; AVX512BW: # %bb.0: 652 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 653 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 654 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 655 ; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 656 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 657 ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k0 658 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm4 659 ; AVX512BW-NEXT: vpandq %zmm4, %zmm2, %zmm2 660 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 661 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 662 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 663 ; AVX512BW-NEXT: retq 664 ; 665 ; AVX512DQ-LABEL: testv64i8u: 666 ; AVX512DQ: # %bb.0: 667 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 668 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm3 669 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 670 ; AVX512DQ-NEXT: vpshufb %ymm3, %ymm4, %ymm3 671 ; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0 672 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0 673 ; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5 674 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm6 675 ; AVX512DQ-NEXT: vpand %ymm6, %ymm3, %ymm3 676 ; AVX512DQ-NEXT: vpshufb %ymm0, %ymm4, %ymm0 677 ; AVX512DQ-NEXT: vpaddb %ymm0, %ymm3, %ymm0 678 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm3 679 ; AVX512DQ-NEXT: vpshufb %ymm3, %ymm4, %ymm3 680 ; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1 681 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1 682 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm2 683 ; AVX512DQ-NEXT: vpand %ymm2, %ymm3, %ymm2 684 ; AVX512DQ-NEXT: vpshufb %ymm1, %ymm4, %ymm1 685 ; AVX512DQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1 686 ; AVX512DQ-NEXT: retq 687 %out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 -1) 688 ret <64 x i8> %out 689 } 690 691 declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) 692 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) 693 declare <32 x i16> @llvm.ctlz.v32i16(<32 x i16>, i1) 694 declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>, i1) 695