Lines Matching full:next
14 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
15 ; SSE-NEXT: retq
19 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
20 ; AVX-NEXT: retq
27 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
28 ; SSE-NEXT: retq
32 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
33 ; AVX-NEXT: retq
40 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
41 ; SSE2-NEXT: retq
45 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
46 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
47 ; SSSE3-NEXT: retq
51 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
52 ; SSE41-NEXT: movdqa %xmm1, %xmm0
53 ; SSE41-NEXT: retq
57 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
58 ; AVX-NEXT: retq
66 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
67 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
68 ; SSE-NEXT: retq
72 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
73 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
74 ; AVX1-NEXT: retq
78 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
79 ; AVX2-NEXT: retq
86 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
87 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
88 ; SSE-NEXT: retq
92 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
93 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
94 ; AVX-NEXT: retq
101 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
102 ; SSE-NEXT: retq
106 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
107 ; AVX-NEXT: retq
114 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
115 ; SSE-NEXT: retq
119 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
120 ; AVX-NEXT: retq
127 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
128 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
129 ; SSE-NEXT: retq
133 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
134 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
135 ; AVX-NEXT: retq
142 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
143 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
144 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
145 ; SSE2-NEXT: retq
149 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
150 ; SSSE3-NEXT: retq
154 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
155 ; SSE41-NEXT: retq
159 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
160 ; AVX-NEXT: retq
167 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
168 ; SSE-NEXT: retq
172 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
173 ; AVX-NEXT: retq
180 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
181 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
182 ; SSE-NEXT: retq
186 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
187 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
188 ; AVX-NEXT: retq
195 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
196 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
197 ; SSE-NEXT: retq
201 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
202 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
203 ; AVX-NEXT: retq
210 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
211 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
212 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
213 ; SSE2-NEXT: retq
217 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
218 ; SSSE3-NEXT: retq
222 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
223 ; SSE41-NEXT: retq
227 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
228 ; AVX-NEXT: retq
236 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
237 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
238 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
239 ; SSE2-NEXT: retq
243 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
244 ; SSSE3-NEXT: retq
248 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
249 ; SSE41-NEXT: retq
253 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
254 ; AVX-NEXT: retq
261 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
262 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
263 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
264 ; SSE2-NEXT: retq
268 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
269 ; SSSE3-NEXT: retq
273 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
274 ; SSE41-NEXT: retq
278 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
279 ; AVX-NEXT: retq
286 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
287 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
288 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
289 ; SSE2-NEXT: retq
293 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
294 ; SSSE3-NEXT: retq
298 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
299 ; SSE41-NEXT: retq
303 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
304 ; AVX-NEXT: retq
311 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
312 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
313 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6]
314 ; SSE2-NEXT: retq
318 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
319 ; SSSE3-NEXT: retq
323 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
324 ; SSE41-NEXT: retq
328 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
329 ; AVX-NEXT: retq
336 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
337 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7]
338 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
339 ; SSE2-NEXT: retq
343 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
344 ; SSSE3-NEXT: retq
348 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
349 ; SSE41-NEXT: retq
353 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
354 ; AVX-NEXT: retq
362 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
363 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
364 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
365 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
366 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
367 ; SSE2-NEXT: retq
371 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
372 ; SSSE3-NEXT: retq
376 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
377 ; SSE41-NEXT: retq
381 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
382 ; AVX-NEXT: retq
389 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
390 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
391 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
392 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
393 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
394 ; SSE2-NEXT: retq
398 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
399 ; SSSE3-NEXT: retq
403 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
404 ; SSE41-NEXT: retq
408 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
409 ; AVX-NEXT
416 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
417 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
418 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
419 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
420 ; SSE2-NEXT: retq
424 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
425 ; SSSE3-NEXT: retq
429 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
430 ; SSE41-NEXT: retq
434 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
435 ; AVX-NEXT: retq
443 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
444 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
445 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0]
446 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7]
447 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6]
448 ; SSE2-NEXT: retq
452 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
453 ; SSSE3-NEXT: retq
457 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
458 ; SSE41-NEXT: retq
462 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
463 ; AVX-NEXT: retq
471 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
472 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
473 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
474 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6]
475 ; SSE2-NEXT: retq
479 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
480 ; SSSE3-NEXT: retq
484 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
485 ; SSE41-NEXT: retq
489 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
490 ; AVX-NEXT: retq
498 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
499 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
500 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
501 ; SSE2-NEXT: retq
505 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
506 ; SSSE3-NEXT: retq
510 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
511 ; SSE41-NEXT: retq
515 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
516 ; AVX-NEXT: retq
523 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
524 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7]
525 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
526 ; SSE2-NEXT: retq
530 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
531 ; SSSE3-NEXT: retq
535 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
536 ; SSE41-NEXT: retq
540 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
541 ; AVX-NEXT: retq
548 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
549 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
550 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
551 ; SSE2-NEXT: retq
555 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
556 ; SSSE3-NEXT: retq
560 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
561 ; SSE41-NEXT: retq
565 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
566 ; AVX-NEXT: retq
573 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
574 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
575 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
576 ; SSE2-NEXT: retq
580 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
581 ; SSSE3-NEXT: retq
585 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
586 ; SSE41-NEXT: retq
590 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
591 ; AVX-NEXT: retq
598 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
599 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
600 ; SSE-NEXT: retq
604 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
605 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
606 ; AVX-NEXT: retq
614 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
615 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
616 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
617 ; SSE2-NEXT: retq
621 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
622 ; SSSE3-NEXT: retq
626 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
627 ; SSE41-NEXT: retq
631 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
632 ; AVX-NEXT: retq
639 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
640 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7]
641 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
642 ; SSE2-NEXT: retq
646 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
647 ; SSSE3-NEXT: retq
651 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
652 ; SSE41-NEXT: retq
656 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
657 ; AVX-NEXT: retq
664 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
665 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
666 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
667 ; SSE2-NEXT: retq
671 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
672 ; SSSE3-NEXT: retq
676 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
677 ; SSE41-NEXT: retq
681 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
682 ; AVX-NEXT: retq
690 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
691 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
692 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
693 ; SSE2-NEXT: retq
697 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
698 ; SSSE3-NEXT: retq
702 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
703 ; SSE41-NEXT: retq
707 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
708 ; AVX-NEXT: retq
716 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
717 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
718 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
719 ; SSE2-NEXT: retq
723 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
724 ; SSSE3-NEXT: retq
728 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
729 ; SSE41-NEXT: retq
733 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
734 ; AVX-NEXT: retq
742 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
743 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
744 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
745 ; SSE2-NEXT: retq
749 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
750 ; SSSE3-NEXT: retq
754 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
755 ; SSE41-NEXT: retq
759 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
760 ; AVX-NEXT: retq
768 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
769 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
770 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
771 ; SSE2-NEXT: retq
775 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
776 ; SSSE3-NEXT: retq
780 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
781 ; SSE41-NEXT: retq
785 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
786 ; AVX-NEXT: retq
794 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
795 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
796 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
797 ; SSE2-NEXT: retq
801 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
802 ; SSSE3-NEXT: retq
806 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
807 ; SSE41-NEXT: retq
811 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
812 ; AVX-NEXT: retq
820 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
821 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
822 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
823 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
824 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
825 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
826 ; SSE2-NEXT: retq
830 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
831 ; SSSE3-NEXT: retq
835 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
836 ; SSE41-NEXT: retq
840 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
841 ; AVX-NEXT: retq
849 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
850 ; SSE-NEXT: retq
854 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
855 ; AVX-NEXT: retq
863 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
864 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
865 ; SSE-NEXT: retq
869 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
870 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
871 ; AVX-NEXT: retq
879 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
880 ; SSE-NEXT: retq
884 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
885 ; AVX-NEXT: retq
893 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
894 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
895 ; SSE-NEXT: retq
899 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
900 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
901 ; AVX-NEXT: retq
909 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
910 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
911 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
912 ; SSE-NEXT: retq
916 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
917 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
918 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
919 ; AVX-NEXT: retq
927 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
928 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
929 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
930 ; SSE-NEXT: retq
934 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
935 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
936 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
937 ; AVX-NEXT: retq
945 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
946 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
947 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
948 ; SSE-NEXT: retq
952 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
953 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
954 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
955 ; AVX-NEXT: retq
963 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
964 ; SSE-NEXT: movdqa %xmm1, %xmm0
965 ; SSE-NEXT: retq
969 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
970 ; AVX-NEXT: retq
977 ; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
978 ; SSE-NEXT: movdqa %xmm1, %xmm0
979 ; SSE-NEXT: retq
983 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
984 ; AVX-NEXT: retq
992 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
993 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
994 ; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
995 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
996 ; SSE-NEXT: retq
1000 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1001 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1002 ; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
1003 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1004 ; AVX-NEXT: retq
1012 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535]
1013 ; SSE2-NEXT: pand %xmm2, %xmm0
1014 ; SSE2-NEXT: pandn %xmm1, %xmm2
1015 ; SSE2-NEXT: por %xmm0, %xmm2
1016 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3]
1017 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1018 ; SSE2-NEXT: retq
1022 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u]
1023 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1024 ; SSSE3-NEXT: por %xmm1, %xmm0
1025 ; SSSE3-NEXT: retq
1029 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1030 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1031 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1032 ; SSE41-NEXT: retq
1036 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1037 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1038 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1039 ; AVX-NEXT: retq
1047 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1048 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0]
1049 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
1050 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1051 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1052 ; SSE2-NEXT: retq
1056 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1057 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1058 ; SSSE3-NEXT: por %xmm1, %xmm0
1059 ; SSSE3-NEXT: retq
1063 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1064 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1065 ; SSE41-NEXT: retq
1069 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1070 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1071 ; AVX1-NEXT: retq
1075 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1076 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1077 ; AVX2-NEXT: retq
1084 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1085 ; SSE-NEXT: retq
1089 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1090 ; AVX-NEXT: retq
1098 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1099 ; SSE2-NEXT: pand %xmm2, %xmm0
1100 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1101 ; SSE2-NEXT: pandn %xmm1, %xmm2
1102 ; SSE2-NEXT: por %xmm2, %xmm0
1103 ; SSE2-NEXT: retq
1107 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1108 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1109 ; SSSE3-NEXT: por %xmm1, %xmm0
1110 ; SSSE3-NEXT: retq
1114 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1115 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1116 ; SSE41-NEXT: retq
1120 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1121 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1122 ; AVX-NEXT: retq
1130 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0]
1131 ; SSE2-NEXT: pand %xmm2, %xmm1
1132 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1133 ; SSE2-NEXT: pandn %xmm0, %xmm2
1134 ; SSE2-NEXT: por %xmm1, %xmm2
1135 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1136 ; SSE2-NEXT: retq
1140 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7]
1141 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero
1142 ; SSSE3-NEXT: por %xmm1, %xmm0
1143 ; SSSE3-NEXT: retq
1147 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1148 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1149 ; SSE41-NEXT: retq
1153 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1154 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1155 ; AVX1-NEXT: retq
1159 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1160 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1161 ; AVX2-NEXT: retq
1169 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1170 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1171 ; SSE2-NEXT: pand %xmm2, %xmm1
1172 ; SSE2-NEXT: pandn %xmm0, %xmm2
1173 ; SSE2-NEXT: por %xmm1, %xmm2
1174 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1175 ; SSE2-NEXT: retq
1179 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u]
1180 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u]
1181 ; SSSE3-NEXT: por %xmm1, %xmm0
1182 ; SSSE3-NEXT: retq
1186 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1187 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1188 ; SSE41-NEXT: retq
1192 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1193 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1194 ; AVX-NEXT: retq
1202 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1203 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,2,1]
1204 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
1205 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
1206 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1207 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
1208 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
1209 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7]
1210 ; SSE2-NEXT: retq
1214 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero
1215 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
1216 ; SSSE3-NEXT: por %xmm1, %xmm0
1217 ; SSSE3-NEXT: retq
1221 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1222 ; SSE41-NEXT
1223 ; SSE41-NEXT: retq
1227 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1228 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1229 ; AVX1-NEXT: retq
1233 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1234 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1235 ; AVX2-NEXT: retq
1243 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1244 ; SSE2-NEXT: andps %xmm2, %xmm0
1245 ; SSE2-NEXT: andnps %xmm1, %xmm2
1246 ; SSE2-NEXT: orps %xmm2, %xmm0
1247 ; SSE2-NEXT: retq
1251 ; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1252 ; SSSE3-NEXT: andps %xmm2, %xmm0
1253 ; SSSE3-NEXT: andnps %xmm1, %xmm2
1254 ; SSSE3-NEXT: orps %xmm2, %xmm0
1255 ; SSSE3-NEXT: retq
1259 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1260 ; SSE41-NEXT: retq
1264 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1265 ; AVX-NEXT: retq
1273 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0]
1274 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0]
1275 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1276 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1277 ; SSE2-NEXT: pand %xmm1, %xmm0
1278 ; SSE2-NEXT: pandn %xmm2, %xmm1
1279 ; SSE2-NEXT: por %xmm0, %xmm1
1280 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1281 ; SSE2-NEXT: retq
1285 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3]
1286 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero
1287 ; SSSE3-NEXT: por %xmm1, %xmm0
1288 ; SSSE3-NEXT: retq
1292 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1293 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1294 ; SSE41-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1295 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1296 ; SSE41-NEXT: retq
1300 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1301 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1302 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1303 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1304 ; AVX1-NEXT: retq
1308 ; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
1309 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1310 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1311 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1312 ; AVX2-NEXT: retq
1320 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
1321 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
1322 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
1323 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
1324 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1325 ; SSE2-NEXT: retq
1329 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u]
1330 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u]
1331 ; SSSE3-NEXT: por %xmm1, %xmm0
1332 ; SSSE3-NEXT: retq
1336 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1337 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1338 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1339 ; SSE41-NEXT: retq
1343 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1344 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1345 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1346 ; AVX1-NEXT: retq
1350 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1351 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1352 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1353 ; AVX2-NEXT: retq
1361 ; SSE-NEXT: movzwl %di, %eax
1362 ; SSE-NEXT: movd %eax, %xmm0
1363 ; SSE-NEXT: retq
1367 ; AVX-NEXT: movzwl %di, %eax
1368 ; AVX-NEXT: vmovd %eax, %xmm0
1369 ; AVX-NEXT: retq
1378 ; SSE-NEXT: pxor %xmm0, %xmm0
1379 ; SSE-NEXT: pinsrw $1, %edi, %xmm0
1380 ; SSE-NEXT: retq
1384 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1385 ; AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0
1386 ; AVX-NEXT: retq
1395 ; SSE-NEXT: pxor %xmm0, %xmm0
1396 ; SSE-NEXT: pinsrw $5, %edi, %xmm0
1397 ; SSE-NEXT: retq
1401 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1402 ; AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0
1403 ; AVX-NEXT: retq
1412 ; SSE-NEXT: pxor %xmm0, %xmm0
1413 ; SSE-NEXT: pinsrw $7, %edi, %xmm0
1414 ; SSE-NEXT: retq
1418 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1419 ; AVX-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0
1420 ; AVX-NEXT: retq
1429 ; SSE-NEXT: pxor %xmm0, %xmm0
1430 ; SSE-NEXT: pinsrw $2, %edi, %xmm0
1431 ; SSE-NEXT: retq
1435 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1436 ; AVX-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0
1437 ; AVX-NEXT: retq
1446 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1447 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1448 ; SSE2-NEXT: por %xmm1, %xmm0
1449 ; SSE2-NEXT: retq
1453 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1454 ; SSSE3-NEXT: retq
1458 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1459 ; SSE41-NEXT: retq
1463 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1464 ; AVX-NEXT: retq
1472 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1473 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1474 ; SSE2-NEXT: por %xmm1, %xmm0
1475 ; SSE2-NEXT: retq
1479 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1480 ; SSSE3-NEXT: retq
1484 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1485 ; SSE41-NEXT: retq
1489 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1490 ; AVX-NEXT: retq
1498 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1499 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1500 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1501 ; SSE2-NEXT: por %xmm1, %xmm0
1502 ; SSE2-NEXT: retq
1506 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1507 ; SSSE3-NEXT: retq
1511 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1512 ; SSE41-NEXT: retq
1516 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1517 ; AVX-NEXT: retq
1525 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1526 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1527 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1528 ; SSE2-NEXT: por %xmm1, %xmm0
1529 ; SSE2-NEXT: retq
1533 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1534 ; SSSE3-NEXT: retq
1538 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1539 ; SSE41-NEXT: retq
1543 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1544 ; AVX-NEXT: retq
1552 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1553 ; SSE-NEXT: retq
1557 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1558 ; AVX-NEXT: retq
1566 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1567 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1568 ; SSE2-NEXT: por %xmm1, %xmm0
1569 ; SSE2-NEXT: retq
1573 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1574 ; SSSE3-NEXT: retq
1578 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1579 ; SSE41-NEXT: retq
1583 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1584 ; AVX-NEXT: retq
1592 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1593 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1594 ; SSE2-NEXT: por %xmm1, %xmm0
1595 ; SSE2-NEXT: retq
1599 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1600 ; SSSE3-NEXT: retq
1604 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1605 ; SSE41-NEXT: retq
1609 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1610 ; AVX-NEXT: retq
1618 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1619 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1620 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1621 ; SSE2-NEXT: por %xmm1, %xmm0
1622 ; SSE2-NEXT: retq
1626 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1627 ; SSSE3-NEXT: retq
1631 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1632 ; SSE41-NEXT: retq
1636 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1637 ; AVX-NEXT: retq
1645 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1646 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1647 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1648 ; SSE2-NEXT: por %xmm1, %xmm0
1649 ; SSE2-NEXT: retq
1653 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1654 ; SSSE3-NEXT: retq
1658 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1659 ; SSE41-NEXT: retq
1663 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1664 ; AVX-NEXT: retq
1672 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1673 ; SSE-NEXT: retq
1677 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1678 ; AVX-NEXT: retq
1686 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1687 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1688 ; SSE2-NEXT: por %xmm1, %xmm0
1689 ; SSE2-NEXT: retq
1693 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1694 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1695 ; SSSE3-NEXT: retq
1699 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1700 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1701 ; SSE41-NEXT: retq
1705 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1706 ; AVX-NEXT: retq
1714 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1715 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1716 ; SSE2-NEXT: por %xmm1, %xmm0
1717 ; SSE2-NEXT: retq
1721 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1722 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1723 ; SSSE3-NEXT: retq
1727 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1728 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1729 ; SSE41-NEXT: retq
1733 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1734 ; AVX-NEXT: retq
1742 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1743 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
1744 ; SSE2-NEXT: por %xmm1, %xmm0
1745 ; SSE2-NEXT: retq
1749 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1750 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1751 ; SSSE3-NEXT: retq
1755 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1756 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1757 ; SSE41-NEXT: retq
1761 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1762 ; AVX-NEXT: retq
1770 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1771 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
1772 ; SSE2-NEXT: por %xmm1, %xmm0
1773 ; SSE2-NEXT: retq
1777 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1778 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1779 ; SSSE3-NEXT: retq
1783 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1784 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1785 ; SSE41-NEXT: retq
1789 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1790 ; AVX-NEXT: retq
1798 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
1799 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
1800 ; SSE2-NEXT: retq
1804 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
1805 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
1806 ; SSSE3-NEXT: retq
1810 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1811 ; SSE41-NEXT: retq
1815 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1816 ; AVX-NEXT: retq
1824 ; SSE2-NEXT: pxor %xmm1, %xmm1
1825 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1826 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1827 ; SSE2-NEXT: retq
1831 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1832 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1833 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1834 ; SSSE3-NEXT: retq
1838 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1839 ; SSE41-NEXT: retq
1843 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1844 ; AVX-NEXT: retq
1852 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1853 ; SSE2-NEXT: retq
1857 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1858 ; SSSE3-NEXT: retq
1862 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1863 ; SSE41-NEXT: retq
1867 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1868 ; AVX-NEXT: retq
1876 ; SSE2-NEXT: pxor %xmm1, %xmm1
1877 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1878 ; SSE2-NEXT: retq
1882 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1883 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1884 ; SSSE3-NEXT: retq
1888 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1889 ; SSE41-NEXT: retq
1893 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1894 ; AVX-NEXT: retq
1905 ; SSE-NEXT: pslld $16, %xmm0
1906 ; SSE-NEXT: retq
1910 ; AVX-NEXT: vpslld $16, %xmm0, %xmm0
1911 ; AVX-NEXT: retq
1919 ; SSE-NEXT: psllq $48, %xmm0
1920 ; SSE-NEXT: retq
1924 ; AVX-NEXT: vpsllq $48, %xmm0, %xmm0
1925 ; AVX-NEXT: retq
1933 ; SSE-NEXT: psllq $32, %xmm0
1934 ; SSE-NEXT: retq
1938 ; AVX-NEXT: vpsllq $32, %xmm0, %xmm0
1939 ; AVX-NEXT: retq
1947 ; SSE-NEXT: psllq $16, %xmm0
1948 ; SSE-NEXT: retq
1952 ; AVX-NEXT: vpsllq $16, %xmm0, %xmm0
1953 ; AVX-NEXT: retq
1961 ; SSE-NEXT: psrld $16, %xmm0
1962 ; SSE-NEXT: retq
1966 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
1967 ; AVX-NEXT: retq
1975 ; SSE-NEXT: psrlq $16, %xmm0
1976 ; SSE-NEXT: retq
1980 ; AVX-NEXT: vpsrlq $16, %xmm0, %xmm0
1981 ; AVX-NEXT: retq
1989 ; SSE-NEXT: psrlq $32, %xmm0
1990 ; SSE-NEXT: retq
1994 ; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
1995 ; AVX-NEXT: retq
2003 ; SSE-NEXT: psrlq $48, %xmm0
2004 ; SSE-NEXT: retq
2008 ; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
2009 ; AVX-NEXT: retq
2017 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2018 ; SSE-NEXT: retq
2022 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2023 ; AVX-NEXT: retq
2031 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
2032 ; SSE2-NEXT: retq
2036 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
2037 ; SSSE3-NEXT: retq
2041 ; SSE41-NEXT: pxor %xmm1, %xmm1
2042 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2043 ; SSE41-NEXT: retq
2047 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2048 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2049 ; AVX-NEXT: retq
2057 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
2058 ; SSE2-NEXT: retq
2062 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
2063 ; SSSE3-NEXT: retq
2067 ; SSE41-NEXT: pxor %xmm1, %xmm1
2068 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2069 ; SSE41-NEXT: retq
2073 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2074 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2075 ; AVX-NEXT: retq
2083 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
2084 ; SSE2-NEXT: retq
2088 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
2089 ; SSSE3-NEXT: retq
2093 ; SSE41-NEXT: pxor %xmm1, %xmm1
2094 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2095 ; SSE41-NEXT: retq
2099 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2100 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2101 ; AVX-NEXT: retq
2109 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2110 ; SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2111 ; SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2112 ; SSE-NEXT: movdqa %xmm1, %xmm0
2113 ; SSE-NEXT: retq
2117 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2118 ; AVX-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2119 ; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2120 ; AVX-NEXT: retq
2128 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2129 ; SSE-NEXT: retq
2133 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2134 ; AVX-NEXT: retq
2143 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
2144 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
2145 ; SSE2-NEXT: movaps %xmm1, %xmm0
2146 ; SSE2-NEXT: retq
2150 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
2151 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
2152 ; SSSE3-NEXT: movaps %xmm1, %xmm0
2153 ; SSSE3-NEXT: retq
2157 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
2158 ; SSE41-NEXT: retq
2162 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
2163 ; AVX1-NEXT: retq
2167 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
2168 ; AVX2-NEXT: retq
2181 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2182 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2183 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2184 ; SSE2-NEXT: retq
2188 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2189 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2190 ; SSSE3-NEXT: retq
2194 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2195 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2196 ; SSE41-NEXT: retq
2200 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2201 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2202 ; AVX1-NEXT: retq
2206 ; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
2207 ; AVX2-NEXT: retq
2218 ; SSE2-NEXT: movswl (%rdi), %eax
2219 ; SSE2-NEXT: movd %eax, %xmm0
2220 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2221 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2222 ; SSE2-NEXT: retq
2226 ; SSSE3-NEXT: movswl (%rdi), %eax
2227 ; SSSE3-NEXT: movd %eax, %xmm0
2228 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2229 ; SSSE3-NEXT: retq
2233 ; SSE41-NEXT: movswl (%rdi), %eax
2234 ; SSE41-NEXT: movd %eax, %xmm0
2235 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2236 ; SSE41-NEXT: retq
2240 ; AVX1-NEXT: movswl (%rdi), %eax
2241 ; AVX1-NEXT: vmovd %eax, %xmm0
2242 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2243 ; AVX1-NEXT: retq
2247 ; AVX2-NEXT: movswl (%rdi), %eax
2248 ; AVX2-NEXT: vmovd %eax, %xmm0
2249 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2250 ; AVX2-NEXT: retq
2262 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2263 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
2264 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2265 ; SSE2-NEXT: retq
2269 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2270 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2271 ; SSSE3-NEXT: retq
2275 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2276 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2277 ; SSE41-NEXT: retq
2281 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2282 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2283 ; AVX1-NEXT: retq
2287 ; AVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0
2288 ; AVX2-NEXT: retq
2299 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2300 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
2301 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
2302 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2303 ; SSE2-NEXT: retq
2307 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2308 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2309 ; SSSE3-NEXT: retq
2313 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2314 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2315 ; SSE41-NEXT: retq
2319 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2320 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2321 ; AVX1-NEXT: retq
2325 ; AVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0
2326 ; AVX2-NEXT: retq
2337 ; SSE2-NEXT: movswl (%rdi), %eax
2338 ; SSE2-NEXT: movd %eax, %xmm0
2339 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
2340 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2341 ; SSE2-NEXT: retq
2345 ; SSSE3-NEXT: movswl (%rdi), %eax
2346 ; SSSE3-NEXT: movd %eax, %xmm0
2347 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2348 ; SSSE3-NEXT: retq
2352 ; SSE41-NEXT: movswl (%rdi), %eax
2353 ; SSE41-NEXT: movd %eax, %xmm0
2354 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2355 ; SSE41-NEXT: retq
2359 ; AVX1-NEXT: movswl (%rdi), %eax
2360 ; AVX1-NEXT: vmovd %eax, %xmm0
2361 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2362 ; AVX1-NEXT: retq
2366 ; AVX2-NEXT: movswl (%rdi), %eax
2367 ; AVX2-NEXT: shrl $16, %eax
2368 ; AVX2-NEXT: vmovd %eax, %xmm0
2369 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2370 ; AVX2-NEXT: retq
2382 ; SSE2-NEXT: movswl (%rdi), %eax
2383 ; SSE2-NEXT: movd %eax, %xmm0
2384 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
2385 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
2386 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2387 ; SSE2-NEXT: retq
2391 ; SSSE3-NEXT: movswl (%rdi), %eax
2392 ; SSSE3-NEXT: movd %eax, %xmm0
2393 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2394 ; SSSE3-NEXT: retq
2398 ; SSE41-NEXT: movswl (%rdi), %eax
2399 ; SSE41-NEXT: movd %eax, %xmm0
2400 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2401 ; SSE41-NEXT: retq
2405 ; AVX1-NEXT: movswl (%rdi), %eax
2406 ; AVX1-NEXT: vmovd %eax, %xmm0
2407 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2408 ; AVX1-NEXT: retq
2412 ; AVX2-NEXT: movswl (%rdi), %eax
2413 ; AVX2-NEXT: shrl $16, %eax
2414 ; AVX2-NEXT: vmovd %eax, %xmm0
2415 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2416 ; AVX2-NEXT: retq