Lines Matching full:next
15 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
16 ; CHECK-NEXT: movzwl (%rdi,%rdx), %ecx
17 ; CHECK-NEXT: movd %ecx, %xmm0
18 ; CHECK-NEXT: movzwl (%rsi,%rdx), %ecx
19 ; CHECK-NEXT: movd %ecx, %xmm1
20 ; CHECK-NEXT: pxor %xmm2, %xmm2
21 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
22 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
23 ; CHECK-NEXT: pmullw %xmm0, %xmm1
24 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
25 ; CHECK-NEXT: movq %xmm1, (%rax,%rdx,4)
26 ; CHECK-NEXT: retq
53 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
54 ; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
55 ; CHECK-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
56 ; CHECK-NEXT: pxor %xmm2, %xmm2
57 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
58 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
59 ; CHECK-NEXT: pmullw %xmm0, %xmm1
60 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
61 ; CHECK-NEXT: movdqu %xmm1, (%rax,%rdx,4)
62 ; CHECK-NEXT: retq
89 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
90 ; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
91 ; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
92 ; CHECK-NEXT: pxor %xmm2, %xmm2
93 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
94 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
95 ; CHECK-NEXT: pmullw %xmm0, %xmm1
96 ; CHECK-NEXT: movdqa %xmm1, %xmm0
97 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
98 ; CHECK-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
99 ; CHECK-NEXT: movdqu %xmm1, 16(%rax,%rdx,4)
100 ; CHECK-NEXT: movdqu %xmm0, (%rax,%rdx,4)
101 ; CHECK-NEXT: retq
128 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
129 ; CHECK-NEXT: movdqu (%rdi,%rdx), %xmm0
130 ; CHECK-NEXT: movdqu (%rsi,%rdx), %xmm1
131 ; CHECK-NEXT: pxor %xmm2, %xmm2
132 ; CHECK-NEXT: movdqa %xmm0, %xmm3
133 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
134 ; CHECK-NEXT: movdqa %xmm1, %xmm4
135 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
136 ; CHECK-NEXT: pmullw %xmm3, %xmm4
137 ; CHECK-NEXT: movdqa %xmm4, %xmm3
138 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
139 ; CHECK-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
140 ; CHECK-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
141 ; CHECK-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
142 ; CHECK-NEXT: pmullw %xmm0, %xmm1
143 ; CHECK-NEXT: movdqa %xmm1, %xmm0
144 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
145 ; CHECK-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
146 ; CHECK-NEXT: movdqu %xmm1, 48(%rax,%rdx,4)
147 ; CHECK-NEXT: movdqu %xmm0, 32(%rax,%rdx,4)
148 ; CHECK-NEXT: movdqu %xmm4, 16(%rax,%rdx,4)
149 ; CHECK-NEXT: movdqu %xmm3, (%rax,%rdx,4)
150 ; CHECK-NEXT: retq
177 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
178 ; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
179 ; CHECK-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
180 ; CHECK-NEXT: movdqa %xmm1, %xmm2
181 ; CHECK-NEXT: pmulhuw %xmm0, %xmm2
182 ; CHECK-NEXT: pmullw %xmm0, %xmm1
183 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
184 ; CHECK-NEXT: movq %xmm1, (%rax,%rdx,4)
185 ; CHECK-NEXT: retq
212 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
213 ; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
214 ; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
215 ; CHECK-NEXT: movdqa %xmm1, %xmm2
216 ; CHECK-NEXT: pmulhuw %xmm0, %xmm2
217 ; CHECK-NEXT: pmullw %xmm0, %xmm1
218 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
219 ; CHECK-NEXT: movdqu %xmm1, (%rax,%rdx,4)
220 ; CHECK-NEXT: retq
247 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
248 ; CHECK-NEXT: movdqu (%rdi,%rdx), %xmm0
249 ; CHECK-NEXT: movdqu (%rsi,%rdx), %xmm1
250 ; CHECK-NEXT: movdqa %xmm1, %xmm2
251 ; CHECK-NEXT: pmulhuw %xmm0, %xmm2
252 ; CHECK-NEXT: pmullw %xmm0, %xmm1
253 ; CHECK-NEXT: movdqa %xmm1, %xmm0
254 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
255 ; CHECK-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
256 ; CHECK-NEXT: movdqu %xmm1, 16(%rax,%rdx,4)
257 ; CHECK-NEXT: movdqu %xmm0, (%rax,%rdx,4)
258 ; CHECK-NEXT: retq
285 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
286 ; CHECK-NEXT: movdqu (%rdi,%rdx), %xmm0
287 ; CHECK-NEXT: movdqu 16(%rdi,%rdx), %xmm1
288 ; CHECK-NEXT: movdqu (%rsi,%rdx), %xmm2
289 ; CHECK-NEXT: movdqu 16(%rsi,%rdx), %xmm3
290 ; CHECK-NEXT: movdqa %xmm2, %xmm4
291 ; CHECK-NEXT: pmulhuw %xmm0, %xmm4
292 ; CHECK-NEXT: pmullw %xmm0, %xmm2
293 ; CHECK-NEXT: movdqa %xmm2, %xmm0
294 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
295 ; CHECK-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
296 ; CHECK-NEXT: movdqa %xmm3, %xmm4
297 ; CHECK-NEXT: pmulhuw %xmm1, %xmm4
298 ; CHECK-NEXT: pmullw %xmm1, %xmm3
299 ; CHECK-NEXT: movdqa %xmm3, %xmm1
300 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
301 ; CHECK-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
302 ; CHECK-NEXT: movdqu %xmm3, 48(%rax,%rdx,4)
303 ; CHECK-NEXT: movdqu %xmm1, 32(%rax,%rdx,4)
304 ; CHECK-NEXT: movdqu %xmm2, 16(%rax,%rdx,4)
305 ; CHECK-NEXT: movdqu %xmm0, (%rax,%rdx,4)
306 ; CHECK-NEXT: retq
333 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
334 ; CHECK-NEXT: movzwl (%rdi,%rdx), %ecx
335 ; CHECK-NEXT: movd %ecx, %xmm0
336 ; CHECK-NEXT: movzwl (%rsi,%rdx), %ecx
337 ; CHECK-NEXT: movd %ecx, %xmm1
338 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
339 ; CHECK-NEXT: psraw $8, %xmm0
340 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
341 ; CHECK-NEXT: psraw $8, %xmm1
342 ; CHECK-NEXT: pmullw %xmm0, %xmm1
343 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
344 ; CHECK-NEXT: psrad $16, %xmm0
345 ; CHECK-NEXT: movq %xmm0, (%rax,%rdx,4)
346 ; CHECK-NEXT: retq
373 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
374 ; CHECK-NEXT: movzwl (%rdi,%rdx), %ecx
375 ; CHECK-NEXT: movd %ecx, %xmm0
376 ; CHECK-NEXT: movzwl (%rsi,%rdx), %ecx
377 ; CHECK-NEXT: movd %ecx, %xmm1
378 ; CHECK-NEXT: pxor %xmm2, %xmm2
379 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
380 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
381 ; CHECK-NEXT: psraw $8, %xmm0
382 ; CHECK-NEXT: movdqa %xmm1, %xmm2
383 ; CHECK-NEXT: pmulhw %xmm0, %xmm2
384 ; CHECK-NEXT: pmullw %xmm1, %xmm0
385 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
386 ; CHECK-NEXT: movq %xmm0, (%rax,%rdx,4)
387 ; CHECK-NEXT: retq
414 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
415 ; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
416 ; CHECK-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
417 ; CHECK-NEXT: movdqa %xmm1, %xmm2
418 ; CHECK-NEXT: pmulhw %xmm0, %xmm2
419 ; CHECK-NEXT: pmullw %xmm0, %xmm1
420 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
421 ; CHECK-NEXT: movq %xmm1, (%rax,%rdx,4)
422 ; CHECK-NEXT: retq
449 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
450 ; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
451 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
452 ; CHECK-NEXT: psrad $16, %xmm0
453 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
454 ; CHECK-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
455 ; CHECK-NEXT: pxor %xmm2, %xmm2
456 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
457 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
458 ; CHECK-NEXT: movdqa %xmm1, %xmm2
459 ; CHECK-NEXT: pmuludq %xmm0, %xmm2
460 ; CHECK-NEXT: movdqa %xmm0, %xmm3
461 ; CHECK-NEXT: psrlq $32, %xmm3
462 ; CHECK-NEXT: pmuludq %xmm1, %xmm3
463 ; CHECK-NEXT: psllq $32, %xmm3
464 ; CHECK-NEXT: paddq %xmm2, %xmm3
465 ; CHECK-NEXT: psrlq $32, %xmm1
466 ; CHECK-NEXT: pmuludq %xmm0, %xmm1
467 ; CHECK-NEXT: psllq $32, %xmm1
468 ; CHECK-NEXT: paddq %xmm3, %xmm1
469 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
470 ; CHECK-NEXT: movq %xmm0, (%rax,%rdx,4)
471 ; CHECK-NEXT: retq
498 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
499 ; CHECK-NEXT: movdqu (%rdi,%rdx), %xmm0
500 ; CHECK-NEXT: movdqu 16(%rdi,%rdx), %xmm1
501 ; CHECK-NEXT: movdqu (%rsi,%rdx), %xmm2
502 ; CHECK-NEXT: movdqu 16(%rsi,%rdx), %xmm3
503 ; CHECK-NEXT: movdqa %xmm2, %xmm4
504 ; CHECK-NEXT: pmulhw %xmm0, %xmm4
505 ; CHECK-NEXT: pmullw %xmm0, %xmm2
506 ; CHECK-NEXT: movdqa %xmm2, %xmm0
507 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
508 ; CHECK-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
509 ; CHECK-NEXT: movdqa %xmm3, %xmm4
510 ; CHECK-NEXT: pmulhw %xmm1, %xmm4
511 ; CHECK-NEXT: pmullw %xmm1, %xmm3
512 ; CHECK-NEXT: movdqa %xmm3, %xmm1
513 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
514 ; CHECK-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
515 ; CHECK-NEXT: movdqu %xmm3, 48(%rax,%rdx,4)
516 ; CHECK-NEXT: movdqu %xmm1, 32(%rax,%rdx,4)
517 ; CHECK-NEXT: movdqu %xmm2, 16(%rax,%rdx,4)
518 ; CHECK-NEXT: movdqu %xmm0, (%rax,%rdx,4)
519 ; CHECK-NEXT: retq
545 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
546 ; CHECK-NEXT: movzwl (%rdi,%rsi), %ecx
547 ; CHECK-NEXT: movd %ecx, %xmm0
548 ; CHECK-NEXT: pxor %xmm1, %xmm1
549 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
550 ; CHECK-NEXT: pmullw {{.*}}(%rip), %xmm0
551 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
552 ; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4)
553 ; CHECK-NEXT: retq
575 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
576 ; CHECK-NEXT: movzwl (%rdi,%rsi), %ecx
577 ; CHECK-NEXT: movd %ecx, %xmm0
578 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
579 ; CHECK-NEXT: psraw $8, %xmm0
580 ; CHECK-NEXT: pmullw {{.*}}(%rip), %xmm0
581 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
582 ; CHECK-NEXT: psrad $16, %xmm0
583 ; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4)
584 ; CHECK-NEXT: retq
606 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
607 ; CHECK-NEXT: movzwl (%rdi,%rsi), %ecx
608 ; CHECK-NEXT: movd %ecx, %xmm0
609 ; CHECK-NEXT: pxor %xmm1, %xmm1
610 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
611 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <0,256,u,u,u,u,u,u>
612 ; CHECK-NEXT: movdqa %xmm0, %xmm2
613 ; CHECK-NEXT: pmulhw %xmm1, %xmm2
614 ; CHECK-NEXT: pmullw %xmm1, %xmm0
615 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
616 ; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4)
617 ; CHECK-NEXT: retq
639 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
640 ; CHECK-NEXT: movzwl (%rdi,%rsi), %ecx
641 ; CHECK-NEXT: movd %ecx, %xmm0
642 ; CHECK-NEXT: pxor %xmm1, %xmm1
643 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
644 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <65535,255,u,u,u,u,u,u>
645 ; CHECK-NEXT: movdqa %xmm0, %xmm2
646 ; CHECK-NEXT: pmulhw %xmm1, %xmm2
647 ; CHECK-NEXT: pmullw %xmm1, %xmm0
648 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
649 ; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4)
650 ; CHECK-NEXT: retq
672 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
673 ; CHECK-NEXT: movzwl (%rdi,%rsi), %ecx
674 ; CHECK-NEXT: movd %ecx, %xmm0
675 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
676 ; CHECK-NEXT: psraw $8, %xmm0
677 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <65407,127,u,u,u,u,u,u>
678 ; CHECK-NEXT: movdqa %xmm0, %xmm2
679 ; CHECK-NEXT: pmulhw %xmm1, %xmm2
680 ; CHECK-NEXT: pmullw %xmm1, %xmm0
681 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
682 ; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4)
683 ; CHECK-NEXT: retq
705 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
706 ; CHECK-NEXT: movzwl (%rdi,%rsi), %ecx
707 ; CHECK-NEXT: movd %ecx, %xmm0
708 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
709 ; CHECK-NEXT: psraw $8, %xmm0
710 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <65408,128,u,u,u,u,u,u>
711 ; CHECK-NEXT: movdqa %xmm0, %xmm2
712 ; CHECK-NEXT: pmulhw %xmm1, %xmm2
713 ; CHECK-NEXT: pmullw %xmm1, %xmm0
714 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
715 ; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4)
716 ; CHECK-NEXT: retq
738 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
739 ; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
740 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <0,65535,u,u,u,u,u,u>
741 ; CHECK-NEXT: movdqa %xmm0, %xmm2
742 ; CHECK-NEXT: pmulhuw %xmm1, %xmm2
743 ; CHECK-NEXT: pmullw %xmm1, %xmm0
744 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
745 ; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4)
746 ; CHECK-NEXT: retq
768 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
769 ; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
770 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <32768,32767,u,u,u,u,u,u>
771 ; CHECK-NEXT: movdqa %xmm0, %xmm2
772 ; CHECK-NEXT: pmulhw %xmm1, %xmm2
773 ; CHECK-NEXT: pmullw %xmm1, %xmm0
774 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
775 ; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4)
776 ; CHECK-NEXT: retq
798 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
799 ; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
800 ; CHECK-NEXT: pxor %xmm1, %xmm1
801 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
802 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
803 ; CHECK-NEXT: movl $65536, %ecx # imm = 0x10000
804 ; CHECK-NEXT: movd %rcx, %xmm1
805 ; CHECK-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
806 ; CHECK-NEXT: movdqa %xmm0, %xmm2
807 ; CHECK-NEXT: pmuludq %xmm1, %xmm2
808 ; CHECK-NEXT: psrlq $32, %xmm0
809 ; CHECK-NEXT: pmuludq %xmm1, %xmm0
810 ; CHECK-NEXT: psllq $32, %xmm0
811 ; CHECK-NEXT: paddq %xmm2, %xmm0
812 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
813 ; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4)
814 ; CHECK-NEXT: retq
836 ; CHECK-NEXT: movq {{.*}}(%rip), %rax
837 ; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
838 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
839 ; CHECK-NEXT: psrad $16, %xmm0
840 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
841 ; CHECK-NEXT: movl $32768, %ecx # imm = 0x8000
842 ; CHECK-NEXT: movd %rcx, %xmm1
843 ; CHECK-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
844 ; CHECK-NEXT: movdqa %xmm0, %xmm2
845 ; CHECK-NEXT: pmuludq %xmm1, %xmm2
846 ; CHECK-NEXT: psrlq $32, %xmm0
847 ; CHECK-NEXT: pmuludq %xmm1, %xmm0
848 ; CHECK-NEXT: psllq $32, %xmm0
849 ; CHECK-NEXT: paddq %xmm2, %xmm0
850 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
851 ; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4)
852 ; CHECK-NEXT: retq