Home | History | Annotate | Download | only in X86

Lines Matching full:sse41

4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
22 ; SSE41-LABEL: zext_16i8_to_8i16:
23 ; SSE41: # BB#0: # %entry
24 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
25 ; SSE41-NEXT: retq
55 ; SSE41-LABEL: zext_16i8_to_16i16:
56 ; SSE41: # BB#0: # %entry
57 ; SSE41-NEXT: movdqa %xmm0, %xmm1
58 ; SSE41-NEXT: pxor %xmm2, %xmm2
59 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
60 ; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
61 ; SSE41-NEXT: retq
100 ; SSE41-LABEL: zext_16i8_to_4i32:
101 ; SSE41: # BB#0: # %entry
102 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
103 ; SSE41-NEXT: retq
136 ; SSE41-LABEL: zext_16i8_to_8i32:
137 ; SSE41: # BB#0: # %entry
138 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
139 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
140 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
141 ; SSE41-NEXT: movdqa %xmm2, %xmm0
142 ; SSE41-NEXT: retq
181 ; SSE41-LABEL: zext_16i8_to_2i64:
182 ; SSE41: # BB#0: # %entry
183 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
184 ; SSE41-NEXT: retq
215 ; SSE41-LABEL: zext_16i8_to_4i64:
216 ; SSE41: # BB#0: # %entry
217 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
218 ; SSE41-NEXT: psrld $16, %xmm0
219 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
220 ; SSE41-NEXT: movdqa %xmm2, %xmm0
221 ; SSE41-NEXT: retq
259 ; SSE41-LABEL: zext_8i16_to_4i32:
260 ; SSE41: # BB#0: # %entry
261 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
262 ; SSE41-NEXT: retq
291 ; SSE41-LABEL: zext_8i16_to_8i32:
292 ; SSE41: # BB#0: # %entry
293 ; SSE41-NEXT: movdqa %xmm0, %xmm1
294 ; SSE41-NEXT: pxor %xmm2, %xmm2
295 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
296 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
297 ; SSE41-NEXT: retq
336 ; SSE41-LABEL: zext_8i16_to_2i64:
337 ; SSE41: # BB#0: # %entry
338 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
339 ; SSE41-NEXT: retq
372 ; SSE41-LABEL: zext_8i16_to_4i64:
373 ; SSE41: # BB#0: # %entry
374 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
375 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
376 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
377 ; SSE41-NEXT: movdqa %xmm2, %xmm0
378 ; SSE41-NEXT: retq
416 ; SSE41-LABEL: zext_4i32_to_2i64:
417 ; SSE41: # BB#0: # %entry
418 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
419 ; SSE41-NEXT: retq
448 ; SSE41-LABEL: zext_4i32_to_4i64:
449 ; SSE41: # BB#0: # %entry
450 ; SSE41-NEXT: movdqa %xmm0, %xmm1
451 ; SSE41-NEXT: pxor %xmm2, %xmm2
452 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
453 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
454 ; SSE41-NEXT: retq
496 ; SSE41-LABEL: load_zext_2i8_to_2i64:
497 ; SSE41: # BB#0: # %entry
498 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
499 ; SSE41-NEXT: retq
528 ; SSE41-LABEL: load_zext_4i8_to_4i32:
529 ; SSE41: # BB#0: # %entry
530 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
531 ; SSE41-NEXT: retq
563 ; SSE41-LABEL: load_zext_4i8_to_4i64:
564 ; SSE41: # BB#0: # %entry
565 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
566 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
567 ; SSE41-NEXT: retq
606 ; SSE41-LABEL: load_zext_8i8_to_8i16:
607 ; SSE41: # BB#0: # %entry
608 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
609 ; SSE41-NEXT: retq
642 ; SSE41-LABEL: load_zext_8i8_to_8i32:
643 ; SSE41: # BB#0: # %entry
644 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
645 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
646 ; SSE41-NEXT: retq
691 ; SSE41-LABEL: load_zext_16i8_to_8i32:
692 ; SSE41: # BB#0: # %entry
693 ; SSE41-NEXT: movdqa (%rdi), %xmm1
694 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
695 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
696 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
697 ; SSE41-NEXT: retq
756 ; SSE41-LABEL: load_zext_8i8_to_8i64:
757 ; SSE41: # BB#0: # %entry
758 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
759 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
760 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
761 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
762 ; SSE41-NEXT: retq
809 ; SSE41-LABEL: load_zext_16i8_to_16i16:
810 ; SSE41: # BB#0: # %entry
811 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
812 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
813 ; SSE41-NEXT: retq
854 ; SSE41-LABEL: load_zext_2i16_to_2i64:
855 ; SSE41: # BB#0: # %entry
856 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
857 ; SSE41-NEXT: retq
884 ; SSE41-LABEL: load_zext_4i16_to_4i32:
885 ; SSE41: # BB#0: # %entry
886 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
887 ; SSE41-NEXT: retq
920 ; SSE41-LABEL: load_zext_4i16_to_4i64:
921 ; SSE41: # BB#0: # %entry
922 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
923 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
924 ; SSE41-NEXT: retq
967 ; SSE41-LABEL: load_zext_8i16_to_8i32:
968 ; SSE41: # BB#0: # %entry
969 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
970 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
971 ; SSE41-NEXT: retq
1010 ; SSE41-LABEL: load_zext_2i32_to_2i64:
1011 ; SSE41: # BB#0: # %entry
1012 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1013 ; SSE41-NEXT: retq
1044 ; SSE41-LABEL: load_zext_4i32_to_4i64:
1045 ; SSE41: # BB#0: # %entry
1046 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1047 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1048 ; SSE41-NEXT: retq
1093 ; SSE41-LABEL: zext_8i8_to_8i32:
1094 ; SSE41: # BB#0: # %entry
1095 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1096 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
1097 ; SSE41-NEXT: pxor %xmm2, %xmm2
1098 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1099 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1100 ; SSE41-NEXT: retq
1144 ; SSE41-LABEL: shuf_zext_8i16_to_8i32:
1145 ; SSE41: # BB#0: # %entry
1146 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1147 ; SSE41-NEXT: pxor %xmm2, %xmm2
1148 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1149 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1150 ; SSE41-NEXT: retq
1192 ; SSE41-LABEL: shuf_zext_4i32_to_4i64:
1193 ; SSE41: # BB#0: # %entry
1194 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1195 ; SSE41-NEXT: pxor %xmm2, %xmm2
1196 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
1197 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1198 ; SSE41-NEXT: retq
1247 ; SSE41-LABEL: shuf_zext_8i8_to_8i32:
1248 ; SSE41: # BB#0: # %entry
1249 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1250 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1251 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1252 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1253 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1254 ; SSE41-NEXT: retq
1296 ; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
1297 ; SSE41: # BB#0: # %entry
1298 ; SSE41-NEXT: psrlq $48, %xmm0
1299 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1300 ; SSE41-NEXT: retq
1334 ; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
1335 ; SSE41: # BB#0: # %entry
1336 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1337 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1338 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1339 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1340 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1341 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1342 ; SSE41-NEXT: retq
1384 ; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
1385 ; SSE41: # BB#0: # %entry
1386 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1387 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1388 ; SSE41-NEXT: retq
1422 ; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
1423 ; SSE41: # BB#0: # %entry
1424 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1425 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1426 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1427 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1428 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1429 ; SSE41-NEXT: retq
1496 ; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
1497 ; SSE41: # BB#0: # %entry
1498 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1499 ; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1500 ; SSE41-NEXT: pxor %xmm2, %xmm2
1501 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1502 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1503 ; SSE41-NEXT: retq
1548 ; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
1549 ; SSE41: # BB#0: # %entry
1550 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1551 ; SSE41-NEXT: pxor %xmm2, %xmm2
1552 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
1553 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1554 ; SSE41-NEXT: movdqa %xmm2, %xmm1
1555 ; SSE41-NEXT: retq
1619 ; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
1620 ; SSE41: # BB#0: # %entry
1621 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1622 ; SSE41-NEXT: pxor %xmm0, %xmm0
1623 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
1624 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1625 ; SSE41-NEXT: retq