Home | History | Annotate | Download | only in X86

Lines Matching full:next

8 ; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
9 ; AVX512BW-NEXT: kmovq %k0, %rax
10 ; AVX512BW-NEXT: vpcmpltb %zmm1, %zmm0, %k0
11 ; AVX512BW-NEXT: kmovq %k0, %rcx
12 ; AVX512BW-NEXT: addq %rax, %rcx
13 ; AVX512BW-NEXT: vpcmpleb %zmm1, %zmm0, %k0
14 ; AVX512BW-NEXT: kmovq %k0, %rax
15 ; AVX512BW-NEXT: addq %rcx, %rax
16 ; AVX512BW-NEXT: vpcmpunordb %zmm1, %zmm0, %k0
17 ; AVX512BW-NEXT: kmovq %k0, %rcx
18 ; AVX512BW-NEXT: addq %rax, %rcx
19 ; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
20 ; AVX512BW-NEXT: kmovq %k0, %rax
21 ; AVX512BW-NEXT: addq %rcx, %rax
22 ; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k0
23 ; AVX512BW-NEXT: kmovq %k0, %rcx
24 ; AVX512BW-NEXT: addq %rax, %rcx
25 ; AVX512BW-NEXT: vpcmpnleb %zmm1, %zmm0, %k0
26 ; AVX512BW-NEXT: kmovq %k0, %rdx
27 ; AVX512BW-NEXT: addq %rcx, %rdx
28 ; AVX512BW-NEXT: vpcmpordb %zmm1, %zmm0, %k0
29 ; AVX512BW-NEXT: kmovq %k0, %rax
30 ; AVX512BW-NEXT: addq %rdx, %rax
31 ; AVX512BW-NEXT: retq
35 ; AVX512F-32-NEXT: subl $68, %esp
36 ; AVX512F-32-NEXT: .Ltmp0:
37 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
38 ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
39 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
40 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
41 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
42 ; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0
43 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
44 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
45 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
46 ; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0
47 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
48 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
49 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
50 ; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0
51 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
52 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
53 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
54 ; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
55 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
56 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
57 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
58 ; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0
59 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
60 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
61 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
62 ; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0
63 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
64 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
65 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
66 ; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0
67 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
68 ; AVX512F-32-NEXT: addl (%esp), %eax
69 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
70 ; AVX512F-32-NEXT: addl $68, %esp
71 ; AVX512F-32-NEXT: retl
93 ; AVX512BW-NEXT: kmovq %rdi, %k1
94 ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
95 ; AVX512BW-NEXT: kmovq %k0, %rax
96 ; AVX512BW-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1}
97 ; AVX512BW-NEXT: kmovq %k0, %rcx
98 ; AVX512BW-NEXT: addq %rax, %rcx
99 ; AVX512BW-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1}
100 ; AVX512BW-NEXT: kmovq %k0, %rax
101 ; AVX512BW-NEXT: addq %rcx, %rax
102 ; AVX512BW-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
103 ; AVX512BW-NEXT: kmovq %k0, %rcx
104 ; AVX512BW-NEXT: addq %rax, %rcx
105 ; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
106 ; AVX512BW-NEXT: kmovq %k0, %rax
107 ; AVX512BW-NEXT: addq %rcx, %rax
108 ; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
109 ; AVX512BW-NEXT: kmovq %k0, %rcx
110 ; AVX512BW-NEXT: addq %rax, %rcx
111 ; AVX512BW-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
112 ; AVX512BW-NEXT: kmovq %k0, %rdx
113 ; AVX512BW-NEXT: addq %rcx, %rdx
114 ; AVX512BW-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1}
115 ; AVX512BW-NEXT: kmovq %k0, %rax
116 ; AVX512BW-NEXT: addq %rdx, %rax
117 ; AVX512BW-NEXT: retq
121 ; AVX512F-32-NEXT: subl $68, %esp
122 ; AVX512F-32-NEXT: .Ltmp1:
123 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
124 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
125 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
126 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
127 ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
128 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
129 ; AVX512F-32-NEXT: movl (%esp), %eax
130 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
131 ; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1}
132 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
133 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
134 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
135 ; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1}
136 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
137 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
138 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
139 ; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
140 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
141 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
142 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
143 ; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
144 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
145 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
146 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
147 ; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
148 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
149 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
150 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
151 ; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
152 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
153 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
154 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
155 ; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1}
156 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
157 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
158 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
159 ; AVX512F-32-NEXT: addl $68, %esp
160 ; AVX512F-32-NEXT: retl
184 ; AVX512BW-NEXT: vpcmpequb %zmm1, %zmm0, %k0
185 ; AVX512BW-NEXT: kmovq %k0, %rax
186 ; AVX512BW-NEXT: vpcmpltub %zmm1, %zmm0, %k0
187 ; AVX512BW-NEXT: kmovq %k0, %rcx
188 ; AVX512BW-NEXT: addq %rax, %rcx
189 ; AVX512BW-NEXT: vpcmpleub %zmm1, %zmm0, %k0
190 ; AVX512BW-NEXT: kmovq %k0, %rax
191 ; AVX512BW-NEXT: addq %rcx, %rax
192 ; AVX512BW-NEXT: vpcmpunordub %zmm1, %zmm0, %k0
193 ; AVX512BW-NEXT: kmovq %k0, %rcx
194 ; AVX512BW-NEXT: addq %rax, %rcx
195 ; AVX512BW-NEXT: vpcmpnequb %zmm1, %zmm0, %k0
196 ; AVX512BW-NEXT: kmovq %k0, %rax
197 ; AVX512BW-NEXT: addq %rcx, %rax
198 ; AVX512BW-NEXT: vpcmpnltub %zmm1, %zmm0, %k0
199 ; AVX512BW-NEXT: kmovq %k0, %rcx
200 ; AVX512BW-NEXT: addq %rax, %rcx
201 ; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
202 ; AVX512BW-NEXT: kmovq %k0, %rdx
203 ; AVX512BW-NEXT: addq %rcx, %rdx
204 ; AVX512BW-NEXT: vpcmpordub %zmm1, %zmm0, %k0
205 ; AVX512BW-NEXT: kmovq %k0, %rax
206 ; AVX512BW-NEXT: addq %rdx, %rax
207 ; AVX512BW-NEXT: retq
211 ; AVX512F-32-NEXT: subl $68, %esp
212 ; AVX512F-32-NEXT: .Ltmp2:
213 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
214 ; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0
215 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
216 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
217 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
218 ; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0
219 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
220 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
221 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
222 ; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0
223 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
224 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
225 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
226 ; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0
227 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
228 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
229 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
230 ; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0
231 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
232 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
233 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
234 ; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0
235 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
236 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
237 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
238 ; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
239 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
240 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
241 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
242 ; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0
243 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
244 ; AVX512F-32-NEXT: addl (%esp), %eax
245 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
246 ; AVX512F-32-NEXT: addl $68, %esp
247 ; AVX512F-32-NEXT: retl
269 ; AVX512BW-NEXT: kmovq %rdi, %k1
270 ; AVX512BW-NEXT: vpcmpequb %zmm1, %zmm0, %k0 {%k1}
271 ; AVX512BW-NEXT: kmovq %k0, %rax
272 ; AVX512BW-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1}
273 ; AVX512BW-NEXT: kmovq %k0, %rcx
274 ; AVX512BW-NEXT: addq %rax, %rcx
275 ; AVX512BW-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1}
276 ; AVX512BW-NEXT: kmovq %k0, %rax
277 ; AVX512BW-NEXT: addq %rcx, %rax
278 ; AVX512BW-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
279 ; AVX512BW-NEXT: kmovq %k0, %rcx
280 ; AVX512BW-NEXT: addq %rax, %rcx
281 ; AVX512BW-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
282 ; AVX512BW-NEXT: kmovq %k0, %rax
283 ; AVX512BW-NEXT: addq %rcx, %rax
284 ; AVX512BW-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
285 ; AVX512BW-NEXT: kmovq %k0, %rcx
286 ; AVX512BW-NEXT: addq %rax, %rcx
287 ; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
288 ; AVX512BW-NEXT: kmovq %k0, %rdx
289 ; AVX512BW-NEXT: addq %rcx, %rdx
290 ; AVX512BW-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1}
291 ; AVX512BW-NEXT: kmovq %k0, %rax
292 ; AVX512BW-NEXT: addq %rdx, %rax
293 ; AVX512BW-NEXT: retq
297 ; AVX512F-32-NEXT: subl $68, %esp
298 ; AVX512F-32-NEXT: .Ltmp3:
299 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
300 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
301 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
302 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
303 ; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0 {%k1}
304 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
305 ; AVX512F-32-NEXT: movl (%esp), %eax
306 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
307 ; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1}
308 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
309 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
310 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
311 ; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1}
312 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
313 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
314 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
315 ; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
316 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
317 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
318 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
319 ; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
320 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
321 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
322 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
323 ; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
324 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
325 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
326 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
327 ; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
328 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
329 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
330 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
331 ; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1}
332 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
333 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
334 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
335 ; AVX512F-32-NEXT: addl $68, %esp
336 ; AVX512F-32-NEXT: retl
360 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
361 ; AVX512BW-NEXT: kmovd %k0, %eax
362 ; AVX512BW-NEXT: vpcmpltw %zmm1, %zmm0, %k0
363 ; AVX512BW-NEXT: kmovd %k0, %ecx
364 ; AVX512BW-NEXT: addl %eax, %ecx
365 ; AVX512BW-NEXT: vpcmplew %zmm1, %zmm0, %k0
366 ; AVX512BW-NEXT: kmovd %k0, %eax
367 ; AVX512BW-NEXT: addl %ecx, %eax
368 ; AVX512BW-NEXT: vpcmpunordw %zmm1, %zmm0, %k0
369 ; AVX512BW-NEXT: kmovd %k0, %ecx
370 ; AVX512BW-NEXT: addl %eax, %ecx
371 ; AVX512BW-NEXT: vpcmpneqw %zmm1, %zmm0, %k0
372 ; AVX512BW-NEXT: kmovd %k0, %eax
373 ; AVX512BW-NEXT: addl %ecx, %eax
374 ; AVX512BW-NEXT: vpcmpnltw %zmm1, %zmm0, %k0
375 ; AVX512BW-NEXT: kmovd %k0, %ecx
376 ; AVX512BW-NEXT: addl %eax, %ecx
377 ; AVX512BW-NEXT: vpcmpnlew %zmm1, %zmm0, %k0
378 ; AVX512BW-NEXT: kmovd %k0, %edx
379 ; AVX512BW-NEXT: addl %ecx, %edx
380 ; AVX512BW-NEXT: vpcmpordw %zmm1, %zmm0, %k0
381 ; AVX512BW-NEXT: kmovd %k0, %eax
382 ; AVX512BW-NEXT: addl %edx, %eax
383 ; AVX512BW-NEXT: retq
387 ; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
388 ; AVX512F-32-NEXT: kmovd %k0, %eax
389 ; AVX512F-32-NEXT: vpcmpltw %zmm1, %zmm0, %k0
390 ; AVX512F-32-NEXT: kmovd %k0, %ecx
391 ; AVX512F-32-NEXT: addl %eax, %ecx
392 ; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0
393 ; AVX512F-32-NEXT: kmovd %k0, %eax
394 ; AVX512F-32-NEXT: addl %ecx, %eax
395 ; AVX512F-32-NEXT: vpcmpunordw %zmm1, %zmm0, %k0
396 ; AVX512F-32-NEXT: kmovd %k0, %ecx
397 ; AVX512F-32-NEXT: addl %eax, %ecx
398 ; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0
399 ; AVX512F-32-NEXT: kmovd %k0, %eax
400 ; AVX512F-32-NEXT: addl %ecx, %eax
401 ; AVX512F-32-NEXT: vpcmpnltw %zmm1, %zmm0, %k0
402 ; AVX512F-32-NEXT: kmovd %k0, %ecx
403 ; AVX512F-32-NEXT: addl %eax, %ecx
404 ; AVX512F-32-NEXT: vpcmpnlew %zmm1, %zmm0, %k0
405 ; AVX512F-32-NEXT: kmovd %k0, %edx
406 ; AVX512F-32-NEXT: addl %ecx, %edx
407 ; AVX512F-32-NEXT: vpcmpordw %zmm1, %zmm0, %k0
408 ; AVX512F-32-NEXT: kmovd %k0, %eax
409 ; AVX512F-32-NEXT: addl %edx, %eax
410 ; AVX512F-32-NEXT: retl
432 ; AVX512BW-NEXT: kmovd %edi, %k1
433 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
434 ; AVX512BW-NEXT: kmovd %k0, %eax
435 ; AVX512BW-NEXT: vpcmpltw %zmm1, %zmm0, %k0 {%k1}
436 ; AVX512BW-NEXT: kmovd %k0, %ecx
437 ; AVX512BW-NEXT: addl %eax, %ecx
438 ; AVX512BW-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1}
439 ; AVX512BW-NEXT: kmovd %k0, %eax
440 ; AVX512BW-NEXT: addl %ecx, %eax
441 ; AVX512BW-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 {%k1}
442 ; AVX512BW-NEXT: kmovd %k0, %ecx
443 ; AVX512BW-NEXT: addl %eax, %ecx
444 ; AVX512BW-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
445 ; AVX512BW-NEXT: kmovd %k0, %eax
446 ; AVX512BW-NEXT: addl %ecx, %eax
447 ; AVX512BW-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
448 ; AVX512BW-NEXT: kmovd %k0, %ecx
449 ; AVX512BW-NEXT: addl %eax, %ecx
450 ; AVX512BW-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 {%k1}
451 ; AVX512BW-NEXT: kmovd %k0, %edx
452 ; AVX512BW-NEXT: addl %ecx, %edx
453 ; AVX512BW-NEXT: vpcmpordw %zmm1, %zmm0, %k0 {%k1}
454 ; AVX512BW-NEXT: kmovd %k0, %eax
455 ; AVX512BW-NEXT: addl %edx, %eax
456 ; AVX512BW-NEXT: retq
460 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
461 ; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
462 ; AVX512F-32-NEXT: kmovd %k0, %eax
463 ; AVX512F-32-NEXT: vpcmpltw %zmm1, %zmm0, %k0 {%k1}
464 ; AVX512F-32-NEXT: kmovd %k0, %ecx
465 ; AVX512F-32-NEXT: addl %eax, %ecx
466 ; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1}
467 ; AVX512F-32-NEXT: kmovd %k0, %eax
468 ; AVX512F-32-NEXT: addl %ecx, %eax
469 ; AVX512F-32-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 {%k1}
470 ; AVX512F-32-NEXT: kmovd %k0, %ecx
471 ; AVX512F-32-NEXT: addl %eax, %ecx
472 ; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
473 ; AVX512F-32-NEXT: kmovd %k0, %eax
474 ; AVX512F-32-NEXT: addl %ecx, %eax
475 ; AVX512F-32-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
476 ; AVX512F-32-NEXT: kmovd %k0, %ecx
477 ; AVX512F-32-NEXT: addl %eax, %ecx
478 ; AVX512F-32-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 {%k1}
479 ; AVX512F-32-NEXT: kmovd %k0, %edx
480 ; AVX512F-32-NEXT: addl %ecx, %edx
481 ; AVX512F-32-NEXT: vpcmpordw %zmm1, %zmm0, %k0 {%k1}
482 ; AVX512F-32-NEXT: kmovd %k0, %eax
483 ; AVX512F-32-NEXT: addl %edx, %eax
484 ; AVX512F-32-NEXT: retl
508 ; AVX512BW-NEXT: vpcmpequw %zmm1, %zmm0, %k0
509 ; AVX512BW-NEXT: kmovd %k0, %eax
510 ; AVX512BW-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
511 ; AVX512BW-NEXT: kmovd %k0, %ecx
512 ; AVX512BW-NEXT: addl %eax, %ecx
513 ; AVX512BW-NEXT: vpcmpleuw %zmm1, %zmm0, %k0
514 ; AVX512BW-NEXT: kmovd %k0, %eax
515 ; AVX512BW-NEXT: addl %ecx, %eax
516 ; AVX512BW-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0
517 ; AVX512BW-NEXT: kmovd %k0, %ecx
518 ; AVX512BW-NEXT: addl %eax, %ecx
519 ; AVX512BW-NEXT: vpcmpnequw %zmm1, %zmm0, %k0
520 ; AVX512BW-NEXT: kmovd %k0, %eax
521 ; AVX512BW-NEXT: addl %ecx, %eax
522 ; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0
523 ; AVX512BW-NEXT: kmovd %k0, %ecx
524 ; AVX512BW-NEXT: addl %eax, %ecx
525 ; AVX512BW-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0
526 ; AVX512BW-NEXT: kmovd %k0, %edx
527 ; AVX512BW-NEXT: addl %ecx, %edx
528 ; AVX512BW-NEXT: vpcmporduw %zmm1, %zmm0, %k0
529 ; AVX512BW-NEXT: kmovd %k0, %eax
530 ; AVX512BW-NEXT: addl %edx, %eax
531 ; AVX512BW-NEXT: retq
535 ; AVX512F-32-NEXT: vpcmpequw %zmm1, %zmm0, %k0
536 ; AVX512F-32-NEXT: kmovd %k0, %eax
537 ; AVX512F-32-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
538 ; AVX512F-32-NEXT: kmovd %k0, %ecx
539 ; AVX512F-32-NEXT: addl %eax, %ecx
540 ; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0
541 ; AVX512F-32-NEXT: kmovd %k0, %eax
542 ; AVX512F-32-NEXT: addl %ecx, %eax
543 ; AVX512F-32-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0
544 ; AVX512F-32-NEXT: kmovd %k0, %ecx
545 ; AVX512F-32-NEXT: addl %eax, %ecx
546 ; AVX512F-32-NEXT: vpcmpnequw %zmm1, %zmm0, %k0
547 ; AVX512F-32-NEXT: kmovd %k0, %eax
548 ; AVX512F-32-NEXT: addl %ecx, %eax
549 ; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0
550 ; AVX512F-32-NEXT: kmovd %k0, %ecx
551 ; AVX512F-32-NEXT: addl %eax, %ecx
552 ; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0
553 ; AVX512F-32-NEXT: kmovd %k0, %edx
554 ; AVX512F-32-NEXT: addl %ecx, %edx
555 ; AVX512F-32-NEXT: vpcmporduw %zmm1, %zmm0, %k0
556 ; AVX512F-32-NEXT: kmovd %k0, %eax
557 ; AVX512F-32-NEXT: addl %edx, %eax
558 ; AVX512F-32-NEXT: retl
580 ; AVX512BW-NEXT: kmovd %edi, %k1
581 ; AVX512BW-NEXT: vpcmpequw %zmm1, %zmm0, %k0 {%k1}
582 ; AVX512BW-NEXT: kmovd %k0, %eax
583 ; AVX512BW-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
584 ; AVX512BW-NEXT: kmovd %k0, %ecx
585 ; AVX512BW-NEXT: addl %eax, %ecx
586 ; AVX512BW-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1}
587 ; AVX512BW-NEXT: kmovd %k0, %eax
588 ; AVX512BW-NEXT: addl %ecx, %eax
589 ; AVX512BW-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1}
590 ; AVX512BW-NEXT: kmovd %k0, %ecx
591 ; AVX512BW-NEXT: addl %eax, %ecx
592 ; AVX512BW-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 {%k1}
593 ; AVX512BW-NEXT: kmovd %k0, %eax
594 ; AVX512BW-NEXT: addl %ecx, %eax
595 ; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
596 ; AVX512BW-NEXT: kmovd %k0, %ecx
597 ; AVX512BW-NEXT: addl %eax, %ecx
598 ; AVX512BW-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1}
599 ; AVX512BW-NEXT: kmovd %k0, %edx
600 ; AVX512BW-NEXT: addl %ecx, %edx
601 ; AVX512BW-NEXT: vpcmporduw %zmm1, %zmm0, %k0 {%k1}
602 ; AVX512BW-NEXT: kmovd %k0, %eax
603 ; AVX512BW-NEXT: addl %edx, %eax
604 ; AVX512BW-NEXT: retq
608 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
609 ; AVX512F-32-NEXT: vpcmpequw %zmm1, %zmm0, %k0 {%k1}
610 ; AVX512F-32-NEXT: kmovd %k0, %eax
611 ; AVX512F-32-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
612 ; AVX512F-32-NEXT: kmovd %k0, %ecx
613 ; AVX512F-32-NEXT: addl %eax, %ecx
614 ; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1}
615 ; AVX512F-32-NEXT: kmovd %k0, %eax
616 ; AVX512F-32-NEXT: addl %ecx, %eax
617 ; AVX512F-32-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1}
618 ; AVX512F-32-NEXT: kmovd %k0, %ecx
619 ; AVX512F-32-NEXT: addl %eax, %ecx
620 ; AVX512F-32-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 {%k1}
621 ; AVX512F-32-NEXT: kmovd %k0, %eax
622 ; AVX512F-32-NEXT: addl %ecx, %eax
623 ; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
624 ; AVX512F-32-NEXT: kmovd %k0, %ecx
625 ; AVX512F-32-NEXT: addl %eax, %ecx
626 ; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1}
627 ; AVX512F-32-NEXT: kmovd %k0, %edx
628 ; AVX512F-32-NEXT: addl %ecx, %edx
629 ; AVX512F-32-NEXT: vpcmporduw %zmm1, %zmm0, %k0 {%k1}
630 ; AVX512F-32-NEXT: kmovd %k0, %eax
631 ; AVX512F-32-NEXT: addl %edx, %eax
632 ; AVX512F-32-NEXT: retl
656 ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0
657 ; AVX512BW-NEXT: retq
661 ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0
662 ; AVX512F-32-NEXT: retl
670 ; AVX512BW-NEXT: kmovd %edi, %k1
671 ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
672 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
673 ; AVX512BW-NEXT: retq
677 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
678 ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
679 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
680 ; AVX512F-32-NEXT: retl
688 ; AVX512BW-NEXT: kmovd %edi, %k1
689 ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
690 ; AVX512BW-NEXT: retq
694 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
695 ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
696 ; AVX512F-32-NEXT: retl
704 ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0
705 ; AVX512BW-NEXT: retq
709 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
710 ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0
711 ; AVX512F-32-NEXT: retl
720 ; AVX512BW-NEXT: kmovd %esi, %k1
721 ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1}
722 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
723 ; AVX512BW-NEXT: retq
727 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
728 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
729 ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1}
730 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
731 ; AVX512F-32-NEXT: retl
740 ; AVX512BW-NEXT: kmovd %esi, %k1
741 ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z}
742 ; AVX512BW-NEXT: retq
746 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
747 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
748 ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z}
749 ; AVX512F-32-NEXT: retl
758 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0
759 ; AVX512BW-NEXT: retq
763 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
764 ; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0
765 ; AVX512F-32-NEXT: retl
776 ; AVX512BW-NEXT: kmovd %esi, %k1
777 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
778 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
779 ; AVX512BW-NEXT: retq
783 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
784 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
785 ; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
786 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
787 ; AVX512F-32-NEXT: retl
798 ; AVX512BW-NEXT: kmovd %esi, %k1
799 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
800 ; AVX512BW-NEXT: retq
804 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
805 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
806 ; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z}
807 ; AVX512F-32-NEXT: retl
820 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0
821 ; AVX512BW-NEXT: retq
825 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0
826 ; AVX512F-32-NEXT: retl
834 ; AVX512BW-NEXT: kmovq %rdi, %k1
835 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
836 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
837 ; AVX512BW-NEXT: retq
841 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
842 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
843 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
844 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
845 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
846 ; AVX512F-32-NEXT: retl
854 ; AVX512BW-NEXT: kmovq %rdi, %k1
855 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
856 ; AVX512BW-NEXT: retq
860 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
861 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
862 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
863 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
864 ; AVX512F-32-NEXT: retl
872 ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0
873 ; AVX512BW-NEXT: retq
877 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
878 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0
879 ; AVX512F-32-NEXT: retl
888 ; AVX512BW-NEXT: kmovq %rsi, %k1
889 ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1}
890 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
891 ; AVX512BW-NEXT: retq
895 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
896 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
897 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
898 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
899 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1}
900 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
901 ; AVX512F-32-NEXT: retl
910 ; AVX512BW-NEXT: kmovq %rsi, %k1
911 ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z}
912 ; AVX512BW-NEXT: retq
916 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
917 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
918 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
919 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
920 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z}
921 ; AVX512F-32-NEXT: retl
933 ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0
934 ; AVX512BW-NEXT: retq
938 ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0
939 ; AVX512F-32-NEXT: retl
947 ; AVX512BW-NEXT: kmovd %edi, %k1
948 ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
949 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
950 ; AVX512BW-NEXT: retq
954 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
955 ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
956 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
957 ; AVX512F-32-NEXT: retl
965 ; AVX512BW-NEXT: kmovd %edi, %k1
966 ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
967 ; AVX512BW-NEXT: retq
971 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
972 ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
973 ; AVX512F-32-NEXT: retl
981 ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0
982 ; AVX512BW-NEXT: retq
986 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
987 ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0
988 ; AVX512F-32-NEXT: retl
997 ; AVX512BW-NEXT: kmovd %esi, %k1
998 ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
999 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1000 ; AVX512BW-NEXT: retq
1004 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1005 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1006 ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1}
1007 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1008 ; AVX512F-32-NEXT: retl
1017 ; AVX512BW-NEXT: kmovd %esi, %k1
1018 ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z}
1019 ; AVX512BW-NEXT: retq
1023 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1024 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1025 ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z}
1026 ; AVX512F-32-NEXT: retl
1035 ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0
1036 ; AVX512BW-NEXT: retq
1040 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1041 ; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0
1042 ; AVX512F-32-NEXT: retl
1053 ; AVX512BW-NEXT: kmovd %esi, %k1
1054 ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
1055 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1056 ; AVX512BW-NEXT: retq
1060 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1061 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1062 ; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
1063 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1064 ; AVX512F-32-NEXT: retl
1075 ; AVX512BW-NEXT: kmovd %esi, %k1
1076 ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
1077 ; AVX512BW-NEXT: retq
1081 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1082 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1083 ; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z}
1084 ; AVX512F-32-NEXT: retl
1097 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
1098 ; AVX512BW-NEXT
1102 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
1103 ; AVX512F-32-NEXT: retl
1111 ; AVX512BW-NEXT: kmovq %rdi, %k1
1112 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
1113 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
1114 ; AVX512BW-NEXT: retq
1118 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1119 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1120 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1121 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
1122 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1123 ; AVX512F-32-NEXT: retl
1131 ; AVX512BW-NEXT: kmovq %rdi, %k1
1132 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
1133 ; AVX512BW-NEXT: retq
1137 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1138 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1139 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1140 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
1141 ; AVX512F-32-NEXT: retl
1149 ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0
1150 ; AVX512BW-NEXT: retq
1154 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1155 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0
1156 ; AVX512F-32-NEXT: retl
1165 ; AVX512BW-NEXT: kmovq %rsi, %k1
1166 ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
1167 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1168 ; AVX512BW-NEXT: retq
1172 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1173 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1174 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1175 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1176 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1}
1177 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1178 ; AVX512F-32-NEXT: retl
1187 ; AVX512BW-NEXT: kmovq %rsi, %k1
1188 ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z}
1189 ; AVX512BW-NEXT: retq
1193 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1194 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1195 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1196 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1197 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z}
1198 ; AVX512F-32-NEXT: retl
1209 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
1210 ; AVX512BW-NEXT: retq
1214 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
1215 ; AVX512F-32-NEXT: retl
1223 ; AVX512BW-NEXT: kmovd %edi, %k1
1224 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
1225 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
1226 ; AVX512BW-NEXT: retq
1230 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1231 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
1232 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1233 ; AVX512F-32-NEXT: retl
1241 ; AVX512BW-NEXT: kmovd %edi, %k1
1242 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1243 ; AVX512BW-NEXT: retq
1247 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1248 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1249 ; AVX512F-32-NEXT: retl
1257 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0
1258 ; AVX512BW-NEXT: retq
1262 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1263 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0
1264 ; AVX512F-32-NEXT: retl
1273 ; AVX512BW-NEXT: kmovd %esi, %k1
1274 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
1275 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1276 ; AVX512BW-NEXT: retq
1280 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1281 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1282 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1}
1283 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1284 ; AVX512F-32-NEXT: retl
1293 ; AVX512BW-NEXT: kmovd %esi, %k1
1294 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
1295 ; AVX512BW-NEXT: retq
1299 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1300 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1301 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z}
1302 ; AVX512F-32-NEXT: retl
1313 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
1314 ; AVX512BW-NEXT: retq
1318 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
1319 ; AVX512F-32-NEXT: retl
1327 ; AVX512BW-NEXT: kmovd %edi, %k1
1328 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
1329 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
1330 ; AVX512BW-NEXT: retq
1334 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1335 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
1336 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1337 ; AVX512F-32-NEXT: retl
1345 ; AVX512BW-NEXT: kmovd %edi, %k1
1346 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1347 ; AVX512BW-NEXT: retq
1351 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1352 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1353 ; AVX512F-32-NEXT: retl
1361 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0
1362 ; AVX512BW-NEXT: retq
1366 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1367 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0
1368 ; AVX512F-32-NEXT: retl
1377 ; AVX512BW-NEXT: kmovd %esi, %k1
1378 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
1379 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1380 ; AVX512BW-NEXT: retq
1384 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1385 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1386 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1}
1387 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1388 ; AVX512F-32-NEXT: retl
1397 ; AVX512BW-NEXT: kmovd %esi, %k1
1398 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
1399 ; AVX512BW-NEXT: retq
1403 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1404 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1405 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z}
1406 ; AVX512F-32-NEXT: retl
1417 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
1418 ; AVX512BW-NEXT: retq
1422 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
1423 ; AVX512F-32-NEXT: retl
1431 ; AVX512BW-NEXT: kmovd %edi, %k1
1432 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
1433 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
1434 ; AVX512BW-NEXT: retq
1438 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1439 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
1440 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1441 ; AVX512F-32-NEXT: retl
1449 ; AVX512BW-NEXT: kmovd %edi, %k1
1450 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1451 ; AVX512BW-NEXT: retq
1455 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1456 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1457 ; AVX512F-32-NEXT: retl
1465 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0
1466 ; AVX512BW-NEXT: retq
1470 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1471 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0
1472 ; AVX512F-32-NEXT: retl
1481 ; AVX512BW-NEXT: kmovd %esi, %k1
1482 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
1483 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1484 ; AVX512BW-NEXT: retq
1488 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1489 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1490 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1}
1491 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1492 ; AVX512F-32-NEXT: retl
1501 ; AVX512BW-NEXT: kmovd %esi, %k1
1502 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
1503 ; AVX512BW-NEXT: retq
1507 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1508 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1509 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z}
1510 ; AVX512F-32-NEXT: retl
1521 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
1522 ; AVX512BW-NEXT: retq
1526 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
1527 ; AVX512F-32-NEXT: retl
1535 ; AVX512BW-NEXT: kmovd %edi, %k1
1536 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
1537 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
1538 ; AVX512BW-NEXT: retq
1542 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1543 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
1544 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1545 ; AVX512F-32-NEXT: retl
1553 ; AVX512BW-NEXT: kmovd %edi, %k1
1554 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1555 ; AVX512BW-NEXT: retq
1559 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1560 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1561 ; AVX512F-32-NEXT: retl
1569 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0
1570 ; AVX512BW-NEXT: retq
1574 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1575 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0
1576 ; AVX512F-32-NEXT: retl
1585 ; AVX512BW-NEXT: kmovd %esi, %k1
1586 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
1587 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1588 ; AVX512BW-NEXT: retq
1592 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1593 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1594 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1}
1595 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1596 ; AVX512F-32-NEXT: retl
1605 ; AVX512BW-NEXT: kmovd %esi, %k1
1606 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
1607 ; AVX512BW-NEXT: retq
1611 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1612 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1613 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z}
1614 ; AVX512F-32-NEXT: retl
1627 ; AVX512BW-NEXT: kmovq %rdi, %k1
1628 ; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1}
1629 ; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
1630 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1631 ; AVX512BW-NEXT: retq
1635 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1636 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1637 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1638 ; AVX512F-32-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1}
1639 ; AVX512F-32-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
1640 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1641 ; AVX512F-32-NEXT: retl
1653 ; AVX512BW-NEXT: kmovd %edi, %k1
1654 ; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1}
1655 ; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
1656 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1657 ; AVX512BW-NEXT: retq
1661 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1662 ; AVX512F-32-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1}
1663 ; AVX512F-32-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
1664 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1665 ; AVX512F-32-NEXT: retl
1677 ; AVX512BW-NEXT: kmovq %rdi, %k1
1678 ; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1}
1679 ; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
1680 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1681 ; AVX512BW-NEXT: retq
1685 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1686 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1687 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1688 ; AVX512F-32-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1}
1689 ; AVX512F-32-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
1690 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1691 ; AVX512F-32-NEXT: retl
1703 ; AVX512BW-NEXT: kmovd %edi, %k1
1704 ; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1}
1705 ; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
1706 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1707 ; AVX512BW-NEXT: retq
1711 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1712 ; AVX512F-32-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1}
1713 ; AVX512F-32-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
1714 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1715 ; AVX512F-32-NEXT: retl
1727 ; AVX512BW-NEXT: kmovq %rdi, %k1
1728 ; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1}
1729 ; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm0
1730 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1731 ; AVX512BW-NEXT: retq
1735 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1736 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1737 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1738 ; AVX512F-32-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1}
1739 ; AVX512F-32-NEXT: vpminsb %zmm1, %zmm0, %zmm0
1740 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1741 ; AVX512F-32-NEXT: retl
1753 ; AVX512BW-NEXT: kmovd %edi, %k1
1754 ; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1}
1755 ; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm0
1756 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1757 ; AVX512BW-NEXT: retq
1761 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1762 ; AVX512F-32-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1}
1763 ; AVX512F-32-NEXT: vpminsw %zmm1, %zmm0, %zmm0
1764 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1765 ; AVX512F-32-NEXT: retl
1777 ; AVX512BW-NEXT: kmovq %rdi, %k1
1778 ; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1}
1779 ; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm0
1780 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1781 ; AVX512BW-NEXT: retq
1785 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1786 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1787 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1788 ; AVX512F-32-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1}
1789 ; AVX512F-32-NEXT: vpminub %zmm1, %zmm0, %zmm0
1790 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1791 ; AVX512F-32-NEXT: retl
1803 ; AVX512BW-NEXT: kmovd %edi, %k1
1804 ; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1}
1805 ; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm0
1806 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1807 ; AVX512BW-NEXT: retq
1811 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1812 ; AVX512F-32-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1}
1813 ; AVX512F-32-NEXT: vpminuw %zmm1, %zmm0, %zmm0
1814 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1815 ; AVX512F-32-NEXT: retl
1827 ; AVX512BW-NEXT: kmovd %edi, %k1
1828 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm3
1829 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
1830 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
1831 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1832 ; AVX512BW-NEXT: retq
1836 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1837 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3
1838 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
1839 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
1840 ; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1841 ; AVX512F-32-NEXT: retl
1853 ; AVX512BW-NEXT: kmovd %edi, %k1
1854 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm3
1855 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
1856 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
1857 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1858 ; AVX512BW-NEXT: retq
1862 ; AVX512F-32-NEXT
1863 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3
1864 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
1865 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
1866 ; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1867 ; AVX512F-32-NEXT: retl
1879 ; AVX512BW-NEXT: kmovd %edi, %k1
1880 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm3
1881 ; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
1882 ; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
1883 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1884 ; AVX512BW-NEXT: retq
1888 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1889 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3
1890 ; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
1891 ; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
1892 ; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1893 ; AVX512F-32-NEXT: retl
1905 ; AVX512BW-NEXT: kmovq %rdi, %k1
1906 ; AVX512BW-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1}
1907 ; AVX512BW-NEXT: vpavgb %zmm1, %zmm0, %zmm0
1908 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1909 ; AVX512BW-NEXT: retq
1913 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1914 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1915 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1916 ; AVX512F-32-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1}
1917 ; AVX512F-32-NEXT: vpavgb %zmm1, %zmm0, %zmm0
1918 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1919 ; AVX512F-32-NEXT: retl
1931 ; AVX512BW-NEXT: kmovd %edi, %k1
1932 ; AVX512BW-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1}
1933 ; AVX512BW-NEXT: vpavgw %zmm1, %zmm0, %zmm0
1934 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1935 ; AVX512BW-NEXT: retq
1939 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1940 ; AVX512F-32-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1}
1941 ; AVX512F-32-NEXT: vpavgw %zmm1, %zmm0, %zmm0
1942 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1943 ; AVX512F-32-NEXT: retl
1955 ; AVX512BW-NEXT: kmovq %rdi, %k1
1956 ; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
1957 ; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0
1958 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1959 ; AVX512BW-NEXT: retq
1963 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1964 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1965 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1966 ; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
1967 ; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0
1968 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1969 ; AVX512F-32-NEXT: retl
1981 ; AVX512BW-NEXT: kmovd %edi, %k1
1982 ; AVX512BW-NEXT: vpabsw %zmm0, %zmm1 {%k1}
1983 ; AVX512BW-NEXT: vpabsw %zmm0, %zmm0
1984 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
1985 ; AVX512BW-NEXT: retq
1989 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1990 ; AVX512F-32-NEXT: vpabsw %zmm0, %zmm1 {%k1}
1991 ; AVX512F-32-NEXT: vpabsw %zmm0, %zmm0
1992 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
1993 ; AVX512F-32-NEXT: retl
2005 ; AVX512BW-NEXT: kmovq %rdi, %k1
2006 ; AVX512BW-NEXT: vpabsb %zmm0, %zmm1 {%k1}
2007 ; AVX512BW-NEXT: vpabsb %zmm0, %zmm0
2008 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
2009 ; AVX512BW-NEXT: retq
2013 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2014 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2015 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
2016 ; AVX512F-32-NEXT: vpabsb %zmm0, %zmm1 {%k1}
2017 ; AVX512F-32-NEXT: vpabsb %zmm0, %zmm0
2018 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0
2019 ; AVX512F-32-NEXT: retl
2031 ; AVX512BW-NEXT: kmovd %edi, %k1
2032 ; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1}
2033 ; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0
2034 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2035 ; AVX512BW-NEXT: retq
2039 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2040 ; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1}
2041 ; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0
2042 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2043 ; AVX512F-32-NEXT: retl
2055 ; AVX512BW-NEXT: kmovd %edi, %k1
2056 ; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1}
2057 ; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm0
2058 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2059 ; AVX512BW-NEXT: retq
2063 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2064 ; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1}
2065 ; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm0
2066 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2067 ; AVX512F-32-NEXT: retl
2079 ; AVX512BW-NEXT: kmovd %edi, %k1
2080 ; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1}
2081 ; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0
2082 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2083 ; AVX512BW-NEXT: retq
2087 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2088 ; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1}
2089 ; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0
2090 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2091 ; AVX512F-32-NEXT: retl
2103 ; AVX512BW-NEXT: kmovd %edi, %k1
2104 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm1 {%k1}
2105 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z}
2106 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
2107 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
2108 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
2109 ; AVX512BW-NEXT: retq
2113 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2114 ; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm1 {%k1}
2115 ; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z}
2116 ; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm0
2117 ; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
2118 ; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0
2119 ; AVX512F-32-NEXT: retl
2133 ; AVX512BW-NEXT: kmovd %esi, %k1
2134 ; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi)
2135 ; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) {%k1}
2136 ; AVX512BW-NEXT: retq
2140 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2141 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2142 ; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax)
2143 ; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax) {%k1}
2144 ; AVX512F-32-NEXT: retl
2155 ; AVX512BW-NEXT: kmovd %edi, %k1
2156 ; AVX512BW-NEXT: vpmovswb %zmm0, %ymm1 {%k1}
2157 ; AVX512BW-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z}
2158 ; AVX512BW-NEXT: vpmovswb %zmm0, %ymm0
2159 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
2160 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
2161 ; AVX512BW-NEXT: retq
2165 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2166 ; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm1 {%k1}
2167 ; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z}
2168 ; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm0
2169 ; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
2170 ; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0
2171 ; AVX512F-32-NEXT: retl
2185 ; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi)
2186 ; AVX512BW-NEXT: kmovd %esi, %k1
2187 ; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) {%k1}
2188 ; AVX512BW-NEXT: retq
2192 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2193 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
2194 ; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx)
2195 ; AVX512F-32-NEXT: kmovd %eax, %k1
2196 ; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) {%k1}
2197 ; AVX512F-32-NEXT: retl
2208 ; AVX512BW-NEXT: kmovd %edi, %k1
2209 ; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm1 {%k1}
2210 ; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z}
2211 ; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0
2212 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
2213 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
2214 ; AVX512BW-NEXT: retq
2218 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2219 ; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm1 {%k1}
2220 ; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z}
2221 ; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm0
2222 ; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
2223 ; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0
2224 NEXT: retl
2238 ; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi)
2239 ; AVX512BW-NEXT: kmovd %esi, %k1
2240 ; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) {%k1}
2241 ; AVX512BW-NEXT: retq
2245 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2246 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
2247 ; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx)
2248 ; AVX512F-32-NEXT: kmovd %eax, %k1
2249 ; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) {%k1}
2250 ; AVX512F-32-NEXT: retl
2261 ; AVX512BW-NEXT: kmovd %edi, %k1
2262 ; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
2263 ; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0
2264 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2265 ; AVX512BW-NEXT: retq
2269 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2270 ; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
2271 ; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0
2272 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2273 ; AVX512F-32-NEXT: retl
2285 ; AVX512BW-NEXT: kmovw %edi, %k1
2286 ; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
2287 ; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0
2288 ; AVX512BW-NEXT: vpaddd %zmm0, %zmm2, %zmm0
2289 ; AVX512BW-NEXT: retq
2293 ; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
2294 ; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
2295 ; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0
2296 ; AVX512F-32-NEXT: vpaddd %zmm0, %zmm2, %zmm0
2297 ; AVX512F-32-NEXT: retl
2309 ; AVX512BW-NEXT: kmovd %edi, %k1
2310 ; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
2311 ; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
2312 ; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0
2313 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2314 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2315 ; AVX512BW-NEXT: retq
2319 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2320 ; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
2321 ; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
2322 ; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0
2323 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2324 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2325 ; AVX512F-32-NEXT: retl
2339 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm1
2340 ; AVX512BW-NEXT: vpsadbw %zmm2, %zmm0, %zmm0
2341 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0
2342 ; AVX512BW-NEXT: retq
2346 ; AVX512F-32-NEXT: vpsadbw %zmm1, %zmm0, %zmm1
2347 ; AVX512F-32-NEXT: vpsadbw %zmm2, %zmm0, %zmm0
2348 ; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0
2349 ; AVX512F-32-NEXT: retl
2361 ; AVX512BW-NEXT: kmovd %edi, %k0
2362 ; AVX512BW-NEXT: kmovd %esi, %k1
2363 ; AVX512BW-NEXT: kunpckwd %k1, %k0, %k0
2364 ; AVX512BW-NEXT: kmovd %k0, %eax
2365 ; AVX512BW-NEXT: retq
2369 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2370 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2371 ; AVX512F-32-NEXT: kunpckwd %k1, %k0, %k0
2372 ; AVX512F-32-NEXT: kmovd %k0, %eax
2373 ; AVX512F-32-NEXT: retl
2383 ; AVX512BW-NEXT: kmovq %rdi, %k0
2384 ; AVX512BW-NEXT: kmovq %rsi, %k1
2385 ; AVX512BW-NEXT: kunpckdq %k1, %k0, %k0
2386 ; AVX512BW-NEXT: kmovq %k0, %rax
2387 ; AVX512BW-NEXT: retq
2391 ; AVX512F-32-NEXT: subl $12, %esp
2392 ; AVX512F-32-NEXT: .Ltmp4:
2393 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
2394 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2395 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2396 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0
2397 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
2398 ; AVX512F-32-NEXT: movl (%esp), %eax
2399 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
2400 ; AVX512F-32-NEXT: addl $12, %esp
2401 ; AVX512F-32-NEXT: retl
2411 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
2412 ; AVX512BW-NEXT: kmovq %k0, %rax
2413 ; AVX512BW-NEXT: retq
2417 ; AVX512F-32-NEXT: subl $12, %esp
2418 ; AVX512F-32-NEXT: .Ltmp5:
2419 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
2420 ; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
2421 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
2422 ; AVX512F-32-NEXT: movl (%esp), %eax
2423 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
2424 ; AVX512F-32-NEXT: addl $12, %esp
2425 ; AVX512F-32-NEXT: retl
2435 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
2436 ; AVX512BW-NEXT: kmovd %k0, %eax
2437 ; AVX512BW-NEXT: retq
2441 ; AVX512F-32-NEXT: vpmovw2m %zmm0, %k0
2442 ; AVX512F-32-NEXT: kmovd %k0, %eax
2443 ; AVX512F-32-NEXT: retl
2453 ; AVX512BW-NEXT: kmovq %rdi, %k0
2454 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
2455 ; AVX512BW-NEXT: retq
2459 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2460 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2461 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0
2462 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
2463 ; AVX512F-32-NEXT: retl
2473 ; AVX512BW-NEXT: kmovd %edi, %k0
2474 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
2475 ; AVX512BW-NEXT: retq
2479 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2480 ; AVX512F-32-NEXT: vpmovm2w %k0, %zmm0
2481 ; AVX512F-32-NEXT: retl
2491 ; AVX512BW-NEXT: kmovd %edi, %k1
2492 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1}
2493 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 {%k1} {z}
2494 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
2495 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2496 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
2497 ; AVX512BW-NEXT: retq
2501 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2502 ; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1}
2503 ; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 {%k1} {z}
2504 ; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
2505 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2506 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0
2507 ; AVX512F-32-NEXT: retl
2521 ; AVX512BW-NEXT: kmovd %esi, %k1
2522 ; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1}
2523 ; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm2 {%k1} {z}
2524 ; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0
2525 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2526 ; AVX512BW-NEXT: vpaddw %zmm2, %zmm0, %zmm0
2527 ; AVX512BW-NEXT: retq
2531 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2532 ; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1}
2533 ; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm2 {%k1} {z}
2534 ; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm0
2535 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2536 ; AVX512F-32-NEXT: vpaddw %zmm2, %zmm0, %zmm0
2537 ; AVX512F-32-NEXT: retl
2551 ; AVX512BW-NEXT: kmovd %edi, %k1
2552 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
2553 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z}
2554 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
2555 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2556 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2557 ; AVX512BW-NEXT: retq
2561 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2562 ; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
2563 ; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z}
2564 ; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
2565 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2566 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2567 ; AVX512F-32-NEXT: retl
2581 ; AVX512BW-NEXT: kmovd %edi, %k1
2582 ; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1}
2583 ; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm3 {%k1} {z}
2584 ; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
2585 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2586 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2587 ; AVX512BW-NEXT: retq
2591 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2592 ; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1}
2593 ; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm3 {%k1} {z}
2594 ; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm0
2595 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2596 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2597 ; AVX512F-32-NEXT: retl
2611 ; AVX512BW-NEXT: kmovd %esi, %k1
2612 ; AVX512BW-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1}
2613 ; AVX512BW-NEXT: vpsraw $3, %zmm0, %zmm2 {%k1} {z}
2614 ; AVX512BW-NEXT: vpsraw $3, %zmm0, %zmm0
2615 ; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
2616 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2617 ; AVX512BW-NEXT: retq
2621 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2622 ; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1}
2623 ; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm2 {%k1} {z}
2624 ; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm0
2625 ; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
2626 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2627 ; AVX512F-32-NEXT: retl
2641 ; AVX512BW-NEXT: kmovd %edi, %k1
2642 ; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1}
2643 ; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z}
2644 ; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
2645 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2646 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2647 ; AVX512BW-NEXT: retq
2651 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2652 ; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1}
2653 ; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z}
2654 ; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm0
2655 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2656 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2657 ; AVX512F-32-NEXT: retl
2669 ; AVX512BW-NEXT: vmovdqu16 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
2670 ; AVX512BW-NEXT: vpsravw {{.*}}(%rip), %zmm0, %zmm0
2671 ; AVX512BW-NEXT: retq
2675 ; AVX512F-32-NEXT: vmovdqu16 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
2676 ; AVX512F-32-NEXT: vpsravw {{\.LCPI.*}}, %zmm0, %zmm0
2677 ; AVX512F-32-NEXT: retl
2689 ; AVX512BW-NEXT: kmovd %edi, %k1
2690 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1}
2691 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm3 {%k1} {z}
2692 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
2693 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2694 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2695 ; AVX512BW-NEXT: retq
2699 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2700 ; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1}
2701 ; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm3 {%k1} {z}
2702 ; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm0
2703 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2704 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2705 ; AVX512F-32-NEXT: retl
2719 ; AVX512BW-NEXT: kmovd %esi, %k1
2720 ; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1}
2721 ; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm2 {%k1} {z}
2722 ; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0
2723 ; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
2724 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2725 ; AVX512BW-NEXT: retq
2729 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2730 ; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1}
2731 ; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm2 {%k1} {z}
2732 ; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm0
2733 ; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
2734 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2735 ; AVX512F-32-NEXT: retl
2749 ; AVX512BW-NEXT: kmovd %edi, %k1
2750 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1}
2751 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z}
2752 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
2753 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2754 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2755 ; AVX512BW-NEXT: retq
2759 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2760 ; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1}
2761 ; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z}
2762 ; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
2763 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2764 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2765 ; AVX512F-32-NEXT: retl
2779 ; AVX512BW-NEXT: kmovd %edi, %k1
2780 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
2781 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm2 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
2782 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
2783 ; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
2784 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2785 ; AVX512BW-NEXT: retq
2789 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2790 ; AVX512F-32-NEXT: vpmovzxbw {{.*#+}} zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
2791 ; AVX512F-32-NEXT: vpmovzxbw {{.*#+}} zmm2 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
2792 ; AVX512F-32-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
2793 ; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
2794 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2795 ; AVX512F-32-NEXT: retl
2809 ; AVX512BW-NEXT: kmovd %edi, %k1
2810 ; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1}
2811 ; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z}
2812 ; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
2813 ; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
2814 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2815 ; AVX512BW-NEXT: retq
2819 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2820 ; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1}
2821 ; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z}
2822 ; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm0
2823 ; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
2824 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2825 ; AVX512F-32-NEXT: retl
2839 ; AVX512BW-NEXT: kmovd %edi, %k1
2840 ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1}
2841 ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm3 {%k1} {z}
2842 ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
2843 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2844 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2845 ; AVX512BW-NEXT: retq
2849 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2850 ; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1}
2851 ; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm3 {%k1} {z}
2852 ; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm0
2853 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2854 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2855 ; AVX512F-32-NEXT: retl
2869 ; AVX512BW-NEXT: kmovq %rdi, %k1
2870 ; AVX512BW-NEXT: vptestmb %zmm1, %zmm0, %k0 {%k1}
2871 ; AVX512BW-NEXT: kmovq %k0, %rcx
2872 ; AVX512BW-NEXT: vptestmb %zmm1, %zmm0, %k0
2873 ; AVX512BW-NEXT: kmovq %k0, %rax
2874 ; AVX512BW-NEXT
2875 ; AVX512BW-NEXT: retq
2879 ; AVX512F-32-NEXT: subl $20, %esp
2880 ; AVX512F-32-NEXT: .Ltmp6:
2881 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 24
2882 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2883 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2884 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
2885 ; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k0 {%k1}
2886 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
2887 ; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k0
2888 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
2889 ; AVX512F-32-NEXT: movl (%esp), %eax
2890 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
2891 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
2892 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
2893 ; AVX512F-32-NEXT: addl $20, %esp
2894 ; AVX512F-32-NEXT: retl
2906 ; AVX512BW-NEXT: kmovd %edi, %k1
2907 ; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k0 {%k1}
2908 ; AVX512BW-NEXT: kmovd %k0, %ecx
2909 ; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k0
2910 ; AVX512BW-NEXT: kmovd %k0, %eax
2911 ; AVX512BW-NEXT: addl %ecx, %eax
2912 ; AVX512BW-NEXT: retq
2916 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2917 ; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k0 {%k1}
2918 ; AVX512F-32-NEXT: kmovd %k0, %ecx
2919 ; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k0
2920 ; AVX512F-32-NEXT: kmovd %k0, %eax
2921 ; AVX512F-32-NEXT: addl %ecx, %eax
2922 ; AVX512F-32-NEXT: retl
2934 ; AVX512BW-NEXT: kmovq %rdi, %k1
2935 ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k0 {%k1}
2936 ; AVX512BW-NEXT: kmovq %k0, %rcx
2937 ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k0
2938 ; AVX512BW-NEXT: kmovq %k0, %rax
2939 ; AVX512BW-NEXT: addq %rcx, %rax
2940 ; AVX512BW-NEXT: retq
2944 ; AVX512F-32-NEXT: subl $20, %esp
2945 ; AVX512F-32-NEXT: .Ltmp7:
2946 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 24
2947 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2948 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2949 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
2950 ; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k0 {%k1}
2951 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
2952 ; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k0
2953 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
2954 ; AVX512F-32-NEXT: movl (%esp), %eax
2955 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
2956 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
2957 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
2958 ; AVX512F-32-NEXT: addl $20, %esp
2959 ; AVX512F-32-NEXT: retl
2971 ; AVX512BW-NEXT: kmovd %edi, %k1
2972 ; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k0 {%k1}
2973 ; AVX512BW-NEXT: kmovd %k0, %ecx
2974 ; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k0
2975 ; AVX512BW-NEXT: kmovd %k0, %eax
2976 ; AVX512BW-NEXT: addl %ecx, %eax
2977 ; AVX512BW-NEXT: retq
2981 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2982 ; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k0 {%k1}
2983 ; AVX512F-32-NEXT: kmovd %k0, %ecx
2984 ; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k0
2985 ; AVX512F-32-NEXT: kmovd %k0, %eax
2986 ; AVX512F-32-NEXT: addl %ecx, %eax
2987 ; AVX512F-32-NEXT: retl
2999 ; AVX512BW-NEXT: kmovq %rsi, %k1
3000 ; AVX512BW-NEXT: vpbroadcastb %dil, %zmm0 {%k1}
3001 ; AVX512BW-NEXT: vpbroadcastb %dil, %zmm1 {%k1} {z}
3002 ; AVX512BW-NEXT: vpbroadcastb %dil, %zmm2
3003 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
3004 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
3005 ; AVX512BW-NEXT: retq
3009 ; AVX512F-32-NEXT: movb {{[0-9]+}}(%esp), %al
3010 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
3011 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
3012 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
3013 ; AVX512F-32-NEXT: vpbroadcastb %al, %zmm1 {%k1} {z}
3014 ; AVX512F-32-NEXT: vpbroadcastb %al, %zmm0 {%k1}
3015 ; AVX512F-32-NEXT: vpbroadcastb %al, %zmm2
3016 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
3017 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0
3018 ; AVX512F-32-NEXT: retl
3032 ; AVX512BW-NEXT: kmovd %esi, %k1
3033 ; AVX512BW-NEXT: vpbroadcastw %di, %zmm0 {%k1}
3034 ; AVX512BW-NEXT: vpbroadcastw %di, %zmm1 {%k1} {z}
3035 ; AVX512BW-NEXT: vpbroadcastw %di, %zmm2
3036 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
3037 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
3038 ; AVX512BW-NEXT: retq
3042 ; AVX512F-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
3043 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
3044 ; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm0 {%k1}
3045 ; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm1 {%k1} {z}
3046 ; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm2
3047 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
3048 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
3049 ; AVX512F-32-NEXT: retl