Lines Matching full:next
10 ; AVX512CD-NEXT: vpxord %zmm1, %zmm1, %zmm1
11 ; AVX512CD-NEXT: vpsubq %zmm0, %zmm1, %zmm1
12 ; AVX512CD-NEXT: vpandq %zmm1, %zmm0, %zmm0
13 ; AVX512CD-NEXT: vpsubq {{.*}}(%rip){1to8}, %zmm0, %zmm0
14 ; AVX512CD-NEXT: vextracti64x4 $1, %zmm0, %ymm1
15 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16 ; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm3
17 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18 ; AVX512CD-NEXT: vpshufb %ymm3, %ymm4, %ymm3
19 ; AVX512CD-NEXT: vpsrlw $4, %ymm1, %ymm1
20 ; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm1
21 ; AVX512CD-NEXT: vpshufb %ymm1, %ymm4, %ymm1
22 ; AVX512CD-NEXT: vpaddb %ymm3, %ymm1, %ymm1
23 ; AVX512CD-NEXT: vpxor %ymm3, %ymm3, %ymm3
24 ; AVX512CD-NEXT: vpsadbw %ymm3, %ymm1, %ymm1
25 ; AVX512CD-NEXT: vpand %ymm2, %ymm0, %ymm5
26 ; AVX512CD-NEXT: vpshufb %ymm5, %ymm4, %ymm5
27 ; AVX512CD-NEXT: vpsrlw $4, %ymm0, %ymm0
28 ; AVX512CD-NEXT: vpand %ymm2, %ymm0, %ymm0
29 ; AVX512CD-NEXT: vpshufb %ymm0, %ymm4, %ymm0
30 ; AVX512CD-NEXT: vpaddb %ymm5, %ymm0, %ymm0
31 ; AVX512CD-NEXT: vpsadbw %ymm3, %ymm0, %ymm0
32 ; AVX512CD-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
33 ; AVX512CD-NEXT: retq
37 ; AVX512CDBW-NEXT: vpxord %zmm1, %zmm1, %zmm1
38 ; AVX512CDBW-NEXT: vpsubq %zmm0, %zmm1, %zmm2
39 ; AVX512CDBW-NEXT: vpandq %zmm2, %zmm0, %zmm0
40 ; AVX512CDBW-NEXT: vpsubq {{.*}}(%rip){1to8}, %zmm0, %zmm0
41 ; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
42 ; AVX512CDBW-NEXT: vpandq %zmm2, %zmm0, %zmm3
43 ; AVX512CDBW-NEXT: vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
44 ; AVX512CDBW-NEXT: vpshufb %zmm3, %zmm4, %zmm3
45 ; AVX512CDBW-NEXT: vpsrlw $4, %zmm0, %zmm0
46 ; AVX512CDBW-NEXT: vpandq %zmm2, %zmm0, %zmm0
47 ; AVX512CDBW-NEXT: vpshufb %zmm0, %zmm4, %zmm0
48 ; AVX512CDBW-NEXT: vpaddb %zmm3, %zmm0, %zmm0
49 ; AVX512CDBW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0
50 ; AVX512CDBW-NEXT: retq
54 ; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1
55 ; AVX512BW-NEXT: vpsubq %zmm0, %zmm1, %zmm2
56 ; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0
57 ; AVX512BW-NEXT: vpsubq {{.*}}(%rip){1to8}, %zmm0, %zmm0
58 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
59 ; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm3
60 ; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
61 ; AVX512BW-NEXT: vpshufb %zmm3, %zmm4, %zmm3
62 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
63 ; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0
64 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm4, %zmm0
65 ; AVX512BW-NEXT: vpaddb %zmm3, %zmm0, %zmm0
66 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0
67 ; AVX512BW-NEXT: retq
75 ; AVX512CD-NEXT: vpxord %zmm1, %zmm1, %zmm1
76 ; AVX512CD-NEXT: vpsubq %zmm0, %zmm1, %zmm1
77 ; AVX512CD-NEXT: vpandq %zmm1, %zmm0, %zmm0
78 ; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
79 ; AVX512CD-NEXT: vpbroadcastq {{.*}}(%rip), %zmm1
80 ; AVX512CD-NEXT: vpsubq %zmm0, %zmm1, %zmm0
81 ; AVX512CD-NEXT: retq
85 ; AVX512CDBW-NEXT: vpxord %zmm1, %zmm1, %zmm1
86 ; AVX512CDBW-NEXT: vpsubq %zmm0, %zmm1, %zmm1
87 ; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0
88 ; AVX512CDBW-NEXT: vplzcntq %zmm0, %zmm0
89 ; AVX512CDBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm1
90 ; AVX512CDBW-NEXT: vpsubq %zmm0, %zmm1, %zmm0
91 ; AVX512CDBW-NEXT: retq
95 ; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1
96 ; AVX512BW-NEXT: vpsubq %zmm0, %zmm1, %zmm2
97 ; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0
98 ; AVX512BW-NEXT: vpsubq {{.*}}(%rip){1to8}, %zmm0, %zmm0
99 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
100 ; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm3
101 ; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
102 ; AVX512BW-NEXT: vpshufb %zmm3, %zmm4, %zmm3
103 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
104 ; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0
105 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm4, %zmm0
106 ; AVX512BW-NEXT: vpaddb %zmm3, %zmm0, %zmm0
107 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0
108 ; AVX512BW-NEXT: retq
116 ; AVX512CD-NEXT: vpxord %zmm1, %zmm1, %zmm1
117 ; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm1
118 ; AVX512CD-NEXT: vpandd %zmm1, %zmm0, %zmm0
119 ; AVX512CD-NEXT: vpsubd {{.*}}(%rip){1to16}, %zmm0, %zmm0
120 ; AVX512CD-NEXT: vextracti64x4 $1, %zmm0, %ymm1
121 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
122 ; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm3
123 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
124 ; AVX512CD-NEXT: vpshufb %ymm3, %ymm4, %ymm3
125 ; AVX512CD-NEXT: vpsrlw $4, %ymm1, %ymm1
126 ; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm1
127 ; AVX512CD-NEXT: vpshufb %ymm1, %ymm4, %ymm1
128 ; AVX512CD-NEXT: vpaddb %ymm3, %ymm1, %ymm1
129 ; AVX512CD-NEXT: vpxor %ymm3, %ymm3, %ymm3
130 ; AVX512CD-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7]
131 ; AVX512CD-NEXT: vpsadbw %ymm3, %ymm5, %ymm5
132 ; AVX512CD-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5]
133 ; AVX512CD-NEXT: vpsadbw %ymm3, %ymm1, %ymm1
134 ; AVX512CD-NEXT: vpackuswb %ymm5, %ymm1, %ymm1
135 ; AVX512CD-NEXT: vpand %ymm2, %ymm0, %ymm5
136 ; AVX512CD-NEXT: vpshufb %ymm5, %ymm4, %ymm5
137 ; AVX512CD-NEXT: vpsrlw $4, %ymm0, %ymm0
138 ; AVX512CD-NEXT: vpand %ymm2, %ymm0, %ymm0
139 ; AVX512CD-NEXT: vpshufb %ymm0, %ymm4, %ymm0
140 ; AVX512CD-NEXT: vpaddb %ymm5, %ymm0, %ymm0
141 ; AVX512CD-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7]
142 ; AVX512CD-NEXT: vpsadbw %ymm3, %ymm2, %ymm2
143 ; AVX512CD-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5]
144 ; AVX512CD-NEXT: vpsadbw %ymm3, %ymm0, %ymm0
145 ; AVX512CD-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
146 ; AVX512CD-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
147 ; AVX512CD-NEXT: retq
151 ; AVX512CDBW-NEXT: vpxord %zmm1, %zmm1, %zmm1
152 ; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm2
153 ; AVX512CDBW-NEXT: vpandd %zmm2, %zmm0, %zmm0
154 ; AVX512CDBW-NEXT: vpsubd {{.*}}(%rip){1to16}, %zmm0, %zmm0
155 ; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
156 ; AVX512CDBW-NEXT: vpandq %zmm2, %zmm0, %zmm3
157 ; AVX512CDBW-NEXT: vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
158 ; AVX512CDBW-NEXT: vpshufb %zmm3, %zmm4, %zmm3
159 ; AVX512CDBW-NEXT: vpsrlw $4, %zmm0, %zmm0
160 ; AVX512CDBW-NEXT: vpandq %zmm2, %zmm0, %zmm0
161 ; AVX512CDBW-NEXT: vpshufb %zmm0, %zmm4, %zmm0
162 ; AVX512CDBW-NEXT: vpaddb %zmm3, %zmm0, %zmm0
163 ; AVX512CDBW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
164 ; AVX512CDBW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2
165 ; AVX512CDBW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
166 ; AVX512CDBW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0
167 ; AVX512CDBW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0
168 ; AVX512CDBW-NEXT: retq
172 ; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1
173 ; AVX512BW-NEXT: vpsubd %zmm0, %zmm1, %zmm2
174 ; AVX512BW-NEXT: vpandd %zmm2, %zmm0, %zmm0
175 ; AVX512BW-NEXT: vpsubd {{.*}}(%rip){1to16}, %zmm0, %zmm0
176 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
177 ; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm3
178 ; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
179 ; AVX512BW-NEXT: vpshufb %zmm3, %zmm4, %zmm3
180 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
181 ; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0
182 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm4, %zmm0
183 ; AVX512BW-NEXT: vpaddb %zmm3, %zmm0, %zmm0
184 ; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
185 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2
186 ; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
187 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0
188 ; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0
189 ; AVX512BW-NEXT: retq
197 ; AVX512CD-NEXT: vpxord %zmm1, %zmm1, %zmm1
198 ; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm1
199 ; AVX512CD-NEXT: vpandd %zmm1, %zmm0, %zmm0
200 ; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
201 ; AVX512CD-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1
202 ; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm0
203 ; AVX512CD-NEXT: retq
207 ; AVX512CDBW-NEXT: vpxord %zmm1, %zmm1, %zmm1
208 ; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm1
209 ; AVX512CDBW-NEXT: vpandd %zmm1, %zmm0, %zmm0
210 ; AVX512CDBW-NEXT: vplzcntd %zmm0, %zmm0
211 ; AVX512CDBW-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1
212 ; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm0
213 ; AVX512CDBW-NEXT: retq
217 ; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1
218 ; AVX512BW-NEXT: vpsubd %zmm0, %zmm1, %zmm2
219 ; AVX512BW-NEXT: vpandd %zmm2, %zmm0, %zmm0
220 ; AVX512BW-NEXT: vpsubd {{.*}}(%rip){1to16}, %zmm0, %zmm0
221 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
222 ; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm3
223 ; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
224 ; AVX512BW-NEXT: vpshufb %zmm3, %zmm4, %zmm3
225 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
226 ; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0
227 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm4, %zmm0
228 ; AVX512BW-NEXT: vpaddb %zmm3, %zmm0, %zmm0
229 ; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
230 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2
231 ; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
232 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0
233 ; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0
234 ; AVX512BW-NEXT: retq
242 ; AVX512CD-NEXT: vpxor %ymm2, %ymm2, %ymm2
243 ; AVX512CD-NEXT: vpsubw %ymm0, %ymm2, %ymm3
244 ; AVX512CD-NEXT: vpand %ymm3, %ymm0, %ymm0
245 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
246 ; AVX512CD-NEXT: vpsubw %ymm3, %ymm0, %ymm0
247 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
248 ; AVX512CD-NEXT: vpand %ymm4, %ymm0, %ymm5
249 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
250 ; AVX512CD-NEXT: vpshufb %ymm5, %ymm6, %ymm5
251 ; AVX512CD-NEXT: vpsrlw $4, %ymm0, %ymm0
252 ; AVX512CD-NEXT: vpand %ymm4, %ymm0, %ymm0
253 ; AVX512CD-NEXT: vpshufb %ymm0, %ymm6, %ymm0
254 ; AVX512CD-NEXT: vpaddb %ymm5, %ymm0, %ymm0
255 ; AVX512CD-NEXT: vpsllw $8, %ymm0, %ymm5
256 ; AVX512CD-NEXT: vpaddb %ymm0, %ymm5, %ymm0
257 ; AVX512CD-NEXT: vpsrlw $8, %ymm0, %ymm0
258 ; AVX512CD-NEXT: vpsubw %ymm1, %ymm2, %ymm2
259 ; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm1
260 ; AVX512CD-NEXT: vpsubw %ymm3, %ymm1, %ymm1
261 ; AVX512CD-NEXT: vpand %ymm4, %ymm1, %ymm2
262 ; AVX512CD-NEXT: vpshufb %ymm2, %ymm6, %ymm2
263 ; AVX512CD-NEXT: vpsrlw $4, %ymm1, %ymm1
264 ; AVX512CD-NEXT: vpand %ymm4, %ymm1, %ymm1
265 ; AVX512CD-NEXT: vpshufb %ymm1, %ymm6, %ymm1
266 ; AVX512CD-NEXT: vpaddb %ymm2, %ymm1, %ymm1
267 ; AVX512CD-NEXT: vpsllw $8, %ymm1, %ymm2
268 ; AVX512CD-NEXT: vpaddb %ymm1, %ymm2, %ymm1
269 ; AVX512CD-NEXT: vpsrlw $8, %ymm1, %ymm1
270 ; AVX512CD-NEXT: retq
274 ; AVX512CDBW-NEXT: vpxord %zmm1, %zmm1, %zmm1
275 ; AVX512CDBW-NEXT: vpsubw %zmm0, %zmm1, %zmm1
276 ; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0
277 ; AVX512CDBW-NEXT: vpsubw {{.*}}(%rip), %zmm0, %zmm0
278 ; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
279 ; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm2
280 ; AVX512CDBW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
281 ; AVX512CDBW-NEXT: vpshufb %zmm2, %zmm3, %zmm2
282 ; AVX512CDBW-NEXT: vpsrlw $4, %zmm0, %zmm0
283 ; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0
284 ; AVX512CDBW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
285 ; AVX512CDBW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
286 ; AVX512CDBW-NEXT: vpsllw $8, %zmm0, %zmm1
287 ; AVX512CDBW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
288 ; AVX512CDBW-NEXT: vpsrlw $8, %zmm0, %zmm0
289 ; AVX512CDBW-NEXT: retq
293 ; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1
294 ; AVX512BW-NEXT: vpsubw %zmm0, %zmm1, %zmm1
295 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
296 ; AVX512BW-NEXT: vpsubw {{.*}}(%rip), %zmm0, %zmm0
297 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
298 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
299 ; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
300 ; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2
301 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
302 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
303 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
304 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
305 ; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1
306 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
307 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
308 ; AVX512BW-NEXT: retq
316 ; AVX512CD-NEXT: vpxor %ymm2, %ymm2, %ymm2
317 ; AVX512CD-NEXT: vpsubw %ymm0, %ymm2, %ymm3
318 ; AVX512CD-NEXT: vpand %ymm3, %ymm0, %ymm0
319 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
320 ; AVX512CD-NEXT: vpsubw %ymm3, %ymm0, %ymm0
321 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
322 ; AVX512CD-NEXT: vpand %ymm4, %ymm0, %ymm5
323 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
324 ; AVX512CD-NEXT: vpshufb %ymm5, %ymm6, %ymm5
325 ; AVX512CD-NEXT: vpsrlw $4, %ymm0, %ymm0
326 ; AVX512CD-NEXT: vpand %ymm4, %ymm0, %ymm0
327 ; AVX512CD-NEXT: vpshufb %ymm0, %ymm6, %ymm0
328 ; AVX512CD-NEXT: vpaddb %ymm5, %ymm0, %ymm0
329 ; AVX512CD-NEXT: vpsllw $8, %ymm0, %ymm5
330 ; AVX512CD-NEXT: vpaddb %ymm0, %ymm5, %ymm0
331 ; AVX512CD-NEXT: vpsrlw $8, %ymm0, %ymm0
332 ; AVX512CD-NEXT: vpsubw %ymm1, %ymm2, %ymm2
333 ; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm1
334 ; AVX512CD-NEXT: vpsubw %ymm3, %ymm1, %ymm1
335 ; AVX512CD-NEXT: vpand %ymm4, %ymm1, %ymm2
336 ; AVX512CD-NEXT: vpshufb %ymm2, %ymm6, %ymm2
337 ; AVX512CD-NEXT: vpsrlw $4, %ymm1, %ymm1
338 ; AVX512CD-NEXT: vpand %ymm4, %ymm1, %ymm1
339 ; AVX512CD-NEXT: vpshufb %ymm1, %ymm6, %ymm1
340 ; AVX512CD-NEXT: vpaddb %ymm2, %ymm1, %ymm1
341 ; AVX512CD-NEXT: vpsllw $8, %ymm1, %ymm2
342 ; AVX512CD-NEXT: vpaddb %ymm1, %ymm2, %ymm1
343 ; AVX512CD-NEXT: vpsrlw $8, %ymm1, %ymm1
344 ; AVX512CD-NEXT: retq
348 ; AVX512CDBW-NEXT: vpxord %zmm1, %zmm1, %zmm1
349 ; AVX512CDBW-NEXT: vpsubw %zmm0, %zmm1, %zmm1
350 ; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0
351 ; AVX512CDBW-NEXT: vpsubw {{.*}}(%rip), %zmm0, %zmm0
352 ; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
353 ; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm2
354 ; AVX512CDBW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
355 ; AVX512CDBW-NEXT: vpshufb %zmm2, %zmm3, %zmm2
356 ; AVX512CDBW-NEXT: vpsrlw $4, %zmm0, %zmm0
357 ; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0
358 ; AVX512CDBW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
359 ; AVX512CDBW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
360 ; AVX512CDBW-NEXT: vpsllw $8, %zmm0, %zmm1
361 ; AVX512CDBW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
362 ; AVX512CDBW-NEXT: vpsrlw $8, %zmm0, %zmm0
363 ; AVX512CDBW-NEXT: retq
367 ; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1
368 ; AVX512BW-NEXT: vpsubw %zmm0, %zmm1, %zmm1
369 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
370 ; AVX512BW-NEXT: vpsubw {{.*}}(%rip), %zmm0, %zmm0
371 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
372 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
373 ; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
374 ; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2
375 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
376 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
377 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
378 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
379 ; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1
380 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
381 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
382 ; AVX512BW-NEXT: retq
390 ; AVX512CD-NEXT: vpxor %ymm2, %ymm2, %ymm2
391 ; AVX512CD-NEXT: vpsubb %ymm0, %ymm2, %ymm3
392 ; AVX512CD-NEXT: vpand %ymm3, %ymm0, %ymm0
393 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
394 ; AVX512CD-NEXT: vpsubb %ymm3, %ymm0, %ymm0
395 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
396 ; AVX512CD-NEXT: vpand %ymm4, %ymm0, %ymm5
397 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
398 ; AVX512CD-NEXT: vpshufb %ymm5, %ymm6, %ymm5
399 ; AVX512CD-NEXT: vpsrlw $4, %ymm0, %ymm0
400 ; AVX512CD-NEXT: vpand %ymm4, %ymm0, %ymm0
401 ; AVX512CD-NEXT: vpshufb %ymm0, %ymm6, %ymm0
402 ; AVX512CD-NEXT: vpaddb %ymm5, %ymm0, %ymm0
403 ; AVX512CD-NEXT: vpsubb %ymm1, %ymm2, %ymm2
404 ; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm1
405 ; AVX512CD-NEXT: vpsubb %ymm3, %ymm1, %ymm1
406 ; AVX512CD-NEXT: vpand %ymm4, %ymm1, %ymm2
407 ; AVX512CD-NEXT: vpshufb %ymm2, %ymm6, %ymm2
408 ; AVX512CD-NEXT: vpsrlw $4, %ymm1, %ymm1
409 ; AVX512CD-NEXT: vpand %ymm4, %ymm1, %ymm1
410 ; AVX512CD-NEXT: vpshufb %ymm1, %ymm6, %ymm1
411 ; AVX512CD-NEXT: vpaddb %ymm2, %ymm1, %ymm1
412 ; AVX512CD-NEXT: retq
416 ; AVX512CDBW-NEXT: vpxord %zmm1, %zmm1, %zmm1
417 ; AVX512CDBW-NEXT: vpsubb %zmm0, %zmm1, %zmm1
418 ; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0
419 ; AVX512CDBW-NEXT: vpsubb {{.*}}(%rip), %zmm0, %zmm0
420 ; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
421 ; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm2
422 ; AVX512CDBW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
423 ; AVX512CDBW-NEXT: vpshufb %zmm2, %zmm3, %zmm2
424 ; AVX512CDBW-NEXT: vpsrlw $4, %zmm0, %zmm0
425 ; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0
426 ; AVX512CDBW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
427 ; AVX512CDBW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
428 ; AVX512CDBW-NEXT: retq
432 ; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1
433 ; AVX512BW-NEXT: vpsubb %zmm0, %zmm1, %zmm1
434 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
435 ; AVX512BW-NEXT: vpsubb {{.*}}(%rip), %zmm0, %zmm0
436 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
437 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
438 ; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
439 ; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2
440 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
441 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
442 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
443 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
444 ; AVX512BW-NEXT: retq
452 ; AVX512CD-NEXT: vpxor %ymm2, %ymm2, %ymm2
453 ; AVX512CD-NEXT: vpsubb %ymm0, %ymm2, %ymm3
454 ; AVX512CD-NEXT: vpand %ymm3, %ymm0, %ymm0
455 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
456 ; AVX512CD-NEXT: vpsubb %ymm3, %ymm0, %ymm0
457 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
458 ; AVX512CD-NEXT: vpand %ymm4, %ymm0, %ymm5
459 ; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
460 ; AVX512CD-NEXT: vpshufb %ymm5, %ymm6, %ymm5
461 ; AVX512CD-NEXT: vpsrlw $4, %ymm0, %ymm0
462 ; AVX512CD-NEXT: vpand %ymm4, %ymm0, %ymm0
463 ; AVX512CD-NEXT: vpshufb %ymm0, %ymm6, %ymm0
464 ; AVX512CD-NEXT: vpaddb %ymm5, %ymm0, %ymm0
465 ; AVX512CD-NEXT: vpsubb %ymm1, %ymm2, %ymm2
466 ; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm1
467 ; AVX512CD-NEXT: vpsubb %ymm3, %ymm1, %ymm1
468 ; AVX512CD-NEXT: vpand %ymm4, %ymm1, %ymm2
469 ; AVX512CD-NEXT: vpshufb %ymm2, %ymm6, %ymm2
470 ; AVX512CD-NEXT: vpsrlw $4, %ymm1, %ymm1
471 ; AVX512CD-NEXT: vpand %ymm4, %ymm1, %ymm1
472 ; AVX512CD-NEXT: vpshufb %ymm1, %ymm6, %ymm1
473 ; AVX512CD-NEXT: vpaddb %ymm2, %ymm1, %ymm1
474 ; AVX512CD-NEXT: retq
478 ; AVX512CDBW-NEXT: vpxord %zmm1, %zmm1, %zmm1
479 ; AVX512CDBW-NEXT: vpsubb %zmm0, %zmm1, %zmm1
480 ; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0
481 ; AVX512CDBW-NEXT: vpsubb {{.*}}(%rip), %zmm0, %zmm0
482 ; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
483 ; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm2
484 ; AVX512CDBW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
485 ; AVX512CDBW-NEXT: vpshufb %zmm2, %zmm3, %zmm2
486 ; AVX512CDBW-NEXT: vpsrlw $4, %zmm0, %zmm0
487 ; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0
488 ; AVX512CDBW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
489 ; AVX512CDBW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
490 ; AVX512CDBW-NEXT: retq
494 ; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1
495 ; AVX512BW-NEXT: vpsubb %zmm0, %zmm1, %zmm1
496 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
497 ; AVX512BW-NEXT: vpsubb {{.*}}(%rip), %zmm0, %zmm0
498 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
499 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
500 ; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
501 ; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2
502 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
503 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
504 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
505 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
506 ; AVX512BW-NEXT: retq