1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-SLOW 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW 10 11 define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { 12 ; SSE2-LABEL: zext_16i8_to_8i16: 13 ; SSE2: # %bb.0: # %entry 14 ; SSE2-NEXT: pxor %xmm1, %xmm1 15 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 16 ; SSE2-NEXT: retq 17 ; 18 ; SSSE3-LABEL: zext_16i8_to_8i16: 19 ; SSSE3: # %bb.0: # %entry 20 ; SSSE3-NEXT: pxor %xmm1, %xmm1 21 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 22 ; SSSE3-NEXT: retq 23 ; 24 ; SSE41-LABEL: zext_16i8_to_8i16: 25 ; SSE41: # %bb.0: # %entry 26 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 27 ; SSE41-NEXT: retq 28 ; 29 ; AVX-LABEL: zext_16i8_to_8i16: 30 ; AVX: # %bb.0: # %entry 31 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 32 ; AVX-NEXT: retq 33 entry: 34 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 35 %C = zext <8 x i8> %B to <8 x i16> 36 ret <8 x i16> %C 37 } 38 39 ; PR17654 40 define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) { 41 ; SSE2-LABEL: zext_16i8_to_16i16: 42 ; SSE2: # %bb.0: # %entry 43 ; SSE2-NEXT: movdqa %xmm0, %xmm1 44 ; SSE2-NEXT: pxor %xmm2, %xmm2 45 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 46 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 47 ; SSE2-NEXT: retq 48 ; 49 ; SSSE3-LABEL: zext_16i8_to_16i16: 50 ; SSSE3: # %bb.0: # %entry 51 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 52 ; SSSE3-NEXT: pxor %xmm2, %xmm2 53 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 54 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 55 ; SSSE3-NEXT: retq 56 ; 57 ; SSE41-LABEL: zext_16i8_to_16i16: 58 ; SSE41: # %bb.0: # %entry 59 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 60 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 61 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 62 ; SSE41-NEXT: movdqa %xmm2, %xmm0 63 ; SSE41-NEXT: retq 64 ; 65 ; AVX1-LABEL: zext_16i8_to_16i16: 66 ; AVX1: # %bb.0: # %entry 67 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 68 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 69 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 70 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 71 ; AVX1-NEXT: retq 72 ; 73 ; AVX2-LABEL: zext_16i8_to_16i16: 74 ; AVX2: # %bb.0: # %entry 75 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 76 ; AVX2-NEXT: retq 77 ; 78 ; AVX512-LABEL: zext_16i8_to_16i16: 79 ; AVX512: # %bb.0: # %entry 80 ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 81 ; AVX512-NEXT: retq 82 entry: 83 %B = zext <16 x i8> %A to <16 x i16> 84 ret <16 x i16> %B 85 } 86 87 define <32 x i16> @zext_32i8_to_32i16(<32 x i8> %A) { 88 ; SSE2-LABEL: zext_32i8_to_32i16: 89 ; SSE2: # %bb.0: # %entry 90 ; SSE2-NEXT: movdqa %xmm1, %xmm3 91 ; SSE2-NEXT: movdqa %xmm0, %xmm1 92 ; SSE2-NEXT: pxor %xmm4, %xmm4 93 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 94 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15] 95 ; SSE2-NEXT: movdqa %xmm3, %xmm2 96 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 97 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 98 ; SSE2-NEXT: retq 99 ; 100 ; SSSE3-LABEL: zext_32i8_to_32i16: 101 ; SSSE3: # %bb.0: # %entry 102 ; SSSE3-NEXT: movdqa %xmm1, %xmm3 103 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 104 ; SSSE3-NEXT: pxor %xmm4, %xmm4 105 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 106 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15] 107 ; SSSE3-NEXT: movdqa %xmm3, %xmm2 108 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 109 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 110 ; SSSE3-NEXT: retq 111 ; 112 ; SSE41-LABEL: zext_32i8_to_32i16: 113 ; SSE41: # %bb.0: # %entry 114 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 115 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 116 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 117 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 118 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 119 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 120 ; SSE41-NEXT: movdqa %xmm5, %xmm0 121 ; SSE41-NEXT: movdqa %xmm4, %xmm1 122 ; SSE41-NEXT: retq 123 ; 124 ; AVX1-LABEL: zext_32i8_to_32i16: 125 ; AVX1: # %bb.0: # %entry 126 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 127 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 128 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero 129 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 130 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 131 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 132 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 133 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 134 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 135 ; AVX1-NEXT: vmovaps %ymm2, %ymm0 136 ; AVX1-NEXT: retq 137 ; 138 ; AVX2-LABEL: zext_32i8_to_32i16: 139 ; AVX2: # %bb.0: # %entry 140 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 141 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 142 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 143 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0 144 ; AVX2-NEXT: retq 145 ; 146 ; AVX512F-LABEL: zext_32i8_to_32i16: 147 ; AVX512F: # %bb.0: # %entry 148 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 149 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 150 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 151 ; AVX512F-NEXT: vmovdqa %ymm2, %ymm0 152 ; AVX512F-NEXT: retq 153 ; 154 ; AVX512BW-LABEL: zext_32i8_to_32i16: 155 ; AVX512BW: # %bb.0: # %entry 156 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 157 ; AVX512BW-NEXT: retq 158 entry: 159 %B = zext <32 x i8> %A to <32 x i16> 160 ret <32 x i16> %B 161 } 162 163 define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp { 164 ; SSE2-LABEL: zext_16i8_to_4i32: 165 ; SSE2: # %bb.0: # %entry 166 ; SSE2-NEXT: pxor %xmm1, %xmm1 167 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 168 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 169 ; SSE2-NEXT: retq 170 ; 171 ; SSSE3-LABEL: zext_16i8_to_4i32: 172 ; SSSE3: # %bb.0: # %entry 173 ; SSSE3-NEXT: pxor %xmm1, %xmm1 174 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 175 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 176 ; SSSE3-NEXT: retq 177 ; 178 ; SSE41-LABEL: zext_16i8_to_4i32: 179 ; SSE41: # %bb.0: # %entry 180 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 181 ; SSE41-NEXT: retq 182 ; 183 ; AVX-LABEL: zext_16i8_to_4i32: 184 ; AVX: # %bb.0: # %entry 185 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 186 ; AVX-NEXT: retq 187 entry: 188 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 189 %C = zext <4 x i8> %B to <4 x i32> 190 ret <4 x i32> %C 191 } 192 193 define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp { 194 ; SSE2-LABEL: zext_16i8_to_8i32: 195 ; SSE2: # %bb.0: # %entry 196 ; SSE2-NEXT: movdqa %xmm0, %xmm1 197 ; SSE2-NEXT: pxor %xmm2, %xmm2 198 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 199 ; SSE2-NEXT: movdqa %xmm1, %xmm0 200 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 201 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 202 ; SSE2-NEXT: retq 203 ; 204 ; SSSE3-LABEL: zext_16i8_to_8i32: 205 ; SSSE3: # %bb.0: # %entry 206 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 207 ; SSSE3-NEXT: pxor %xmm2, %xmm2 208 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 209 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 210 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 211 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 212 ; SSSE3-NEXT: retq 213 ; 214 ; SSE41-LABEL: zext_16i8_to_8i32: 215 ; SSE41: # %bb.0: # %entry 216 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 217 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 218 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 219 ; SSE41-NEXT: movdqa %xmm2, %xmm0 220 ; SSE41-NEXT: retq 221 ; 222 ; AVX1-LABEL: zext_16i8_to_8i32: 223 ; AVX1: # %bb.0: # %entry 224 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 225 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 226 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 227 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 228 ; AVX1-NEXT: retq 229 ; 230 ; AVX2-LABEL: zext_16i8_to_8i32: 231 ; AVX2: # %bb.0: # %entry 232 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 233 ; AVX2-NEXT: retq 234 ; 235 ; AVX512-LABEL: zext_16i8_to_8i32: 236 ; AVX512: # %bb.0: # %entry 237 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 238 ; AVX512-NEXT: retq 239 entry: 240 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 241 %C = zext <8 x i8> %B to <8 x i32> 242 ret <8 x i32> %C 243 } 244 245 define <16 x i32> @zext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp { 246 ; SSE2-LABEL: zext_16i8_to_16i32: 247 ; SSE2: # %bb.0: # %entry 248 ; SSE2-NEXT: movdqa %xmm0, %xmm3 249 ; SSE2-NEXT: pxor %xmm4, %xmm4 250 ; SSE2-NEXT: movdqa %xmm0, %xmm1 251 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 252 ; SSE2-NEXT: movdqa %xmm1, %xmm0 253 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 254 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 255 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 256 ; SSE2-NEXT: movdqa %xmm3, %xmm2 257 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 258 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 259 ; SSE2-NEXT: retq 260 ; 261 ; SSSE3-LABEL: zext_16i8_to_16i32: 262 ; SSSE3: # %bb.0: # %entry 263 ; SSSE3-NEXT: movdqa %xmm0, %xmm3 264 ; SSSE3-NEXT: pxor %xmm4, %xmm4 265 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 266 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 267 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 268 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 269 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 270 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 271 ; SSSE3-NEXT: movdqa %xmm3, %xmm2 272 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 273 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 274 ; SSSE3-NEXT: retq 275 ; 276 ; SSE41-LABEL: zext_16i8_to_16i32: 277 ; SSE41: # %bb.0: # %entry 278 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 279 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 280 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 281 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 282 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 283 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 284 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 285 ; SSE41-NEXT: movdqa %xmm4, %xmm0 286 ; SSE41-NEXT: retq 287 ; 288 ; AVX1-LABEL: zext_16i8_to_16i32: 289 ; AVX1: # %bb.0: # %entry 290 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 291 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 292 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 293 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 294 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 295 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 296 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 297 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 298 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 299 ; AVX1-NEXT: vmovaps %ymm2, %ymm0 300 ; AVX1-NEXT: retq 301 ; 302 ; AVX2-LABEL: zext_16i8_to_16i32: 303 ; AVX2: # %bb.0: # %entry 304 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 305 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 306 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 307 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0 308 ; AVX2-NEXT: retq 309 ; 310 ; AVX512-LABEL: zext_16i8_to_16i32: 311 ; AVX512: # %bb.0: # %entry 312 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 313 ; AVX512-NEXT: retq 314 entry: 315 %B = zext <16 x i8> %A to <16 x i32> 316 ret <16 x i32> %B 317 } 318 319 define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp { 320 ; SSE2-LABEL: zext_16i8_to_2i64: 321 ; SSE2: # %bb.0: # %entry 322 ; SSE2-NEXT: pxor %xmm1, %xmm1 323 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 324 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 325 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 326 ; SSE2-NEXT: retq 327 ; 328 ; SSSE3-LABEL: zext_16i8_to_2i64: 329 ; SSSE3: # %bb.0: # %entry 330 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 331 ; SSSE3-NEXT: retq 332 ; 333 ; SSE41-LABEL: zext_16i8_to_2i64: 334 ; SSE41: # %bb.0: # %entry 335 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 336 ; SSE41-NEXT: retq 337 ; 338 ; AVX-LABEL: zext_16i8_to_2i64: 339 ; AVX: # %bb.0: # %entry 340 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 341 ; AVX-NEXT: retq 342 entry: 343 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 344 %C = zext <2 x i8> %B to <2 x i64> 345 ret <2 x i64> %C 346 } 347 348 define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp { 349 ; SSE2-LABEL: zext_16i8_to_4i64: 350 ; SSE2: # %bb.0: # %entry 351 ; SSE2-NEXT: movdqa %xmm0, %xmm1 352 ; SSE2-NEXT: pxor %xmm2, %xmm2 353 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 354 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 355 ; SSE2-NEXT: movdqa %xmm1, %xmm0 356 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 357 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 358 ; SSE2-NEXT: retq 359 ; 360 ; SSSE3-LABEL: zext_16i8_to_4i64: 361 ; SSSE3: # %bb.0: # %entry 362 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 363 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 364 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero 365 ; SSSE3-NEXT: retq 366 ; 367 ; SSE41-LABEL: zext_16i8_to_4i64: 368 ; SSE41: # %bb.0: # %entry 369 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 370 ; SSE41-NEXT: psrld $16, %xmm0 371 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 372 ; SSE41-NEXT: movdqa %xmm2, %xmm0 373 ; SSE41-NEXT: retq 374 ; 375 ; AVX1-LABEL: zext_16i8_to_4i64: 376 ; AVX1: # %bb.0: # %entry 377 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 378 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 379 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 380 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 381 ; AVX1-NEXT: retq 382 ; 383 ; AVX2-LABEL: zext_16i8_to_4i64: 384 ; AVX2: # %bb.0: # %entry 385 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 386 ; AVX2-NEXT: retq 387 ; 388 ; AVX512-LABEL: zext_16i8_to_4i64: 389 ; AVX512: # %bb.0: # %entry 390 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 391 ; AVX512-NEXT: retq 392 entry: 393 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 394 %C = zext <4 x i8> %B to <4 x i64> 395 ret <4 x i64> %C 396 } 397 398 define <8 x i64> @zext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp { 399 ; SSE2-LABEL: zext_16i8_to_8i64: 400 ; SSE2: # %bb.0: # %entry 401 ; SSE2-NEXT: movdqa %xmm0, %xmm1 402 ; SSE2-NEXT: pxor %xmm4, %xmm4 403 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3] 404 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 405 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 406 ; SSE2-NEXT: movdqa %xmm1, %xmm0 407 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 408 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 409 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 410 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] 411 ; SSE2-NEXT: movdqa %xmm3, %xmm2 412 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 413 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 414 ; SSE2-NEXT: retq 415 ; 416 ; SSSE3-LABEL: zext_16i8_to_8i64: 417 ; SSSE3: # %bb.0: # %entry 418 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 419 ; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,128,128,128,128,128,128,128,1,128,128,128,128,128,128,128] 420 ; SSSE3-NEXT: pshufb %xmm4, %xmm0 421 ; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2,128,128,128,128,128,128,128,3,128,128,128,128,128,128,128] 422 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] 423 ; SSSE3-NEXT: pshufb %xmm5, %xmm1 424 ; SSSE3-NEXT: movdqa %xmm3, %xmm2 425 ; SSSE3-NEXT: pshufb %xmm4, %xmm2 426 ; SSSE3-NEXT: pshufb %xmm5, %xmm3 427 ; SSSE3-NEXT: retq 428 ; 429 ; SSE41-LABEL: zext_16i8_to_8i64: 430 ; SSE41: # %bb.0: # %entry 431 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 432 ; SSE41-NEXT: movdqa %xmm0, %xmm1 433 ; SSE41-NEXT: psrld $16, %xmm1 434 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 435 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 436 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero 437 ; SSE41-NEXT: psrlq $48, %xmm0 438 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 439 ; SSE41-NEXT: movdqa %xmm4, %xmm0 440 ; SSE41-NEXT: retq 441 ; 442 ; AVX1-LABEL: zext_16i8_to_8i64: 443 ; AVX1: # %bb.0: # %entry 444 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 445 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2 446 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero 447 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 448 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 449 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 450 ; AVX1-NEXT: vpsrlq $48, %xmm0, %xmm0 451 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 452 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 453 ; AVX1-NEXT: vmovaps %ymm2, %ymm0 454 ; AVX1-NEXT: retq 455 ; 456 ; AVX2-LABEL: zext_16i8_to_8i64: 457 ; AVX2: # %bb.0: # %entry 458 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 459 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 460 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 461 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0 462 ; AVX2-NEXT: retq 463 ; 464 ; AVX512-LABEL: zext_16i8_to_8i64: 465 ; AVX512: # %bb.0: # %entry 466 ; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 467 ; AVX512-NEXT: retq 468 entry: 469 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 470 %C = zext <8 x i8> %B to <8 x i64> 471 ret <8 x i64> %C 472 } 473 474 define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp { 475 ; SSE2-LABEL: zext_8i16_to_4i32: 476 ; SSE2: # %bb.0: # %entry 477 ; SSE2-NEXT: pxor %xmm1, %xmm1 478 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 479 ; SSE2-NEXT: retq 480 ; 481 ; SSSE3-LABEL: zext_8i16_to_4i32: 482 ; SSSE3: # %bb.0: # %entry 483 ; SSSE3-NEXT: pxor %xmm1, %xmm1 484 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 485 ; SSSE3-NEXT: retq 486 ; 487 ; SSE41-LABEL: zext_8i16_to_4i32: 488 ; SSE41: # %bb.0: # %entry 489 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 490 ; SSE41-NEXT: retq 491 ; 492 ; AVX-LABEL: zext_8i16_to_4i32: 493 ; AVX: # %bb.0: # %entry 494 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 495 ; AVX-NEXT: retq 496 entry: 497 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 498 %C = zext <4 x i16> %B to <4 x i32> 499 ret <4 x i32> %C 500 } 501 502 define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 503 ; SSE2-LABEL: zext_8i16_to_8i32: 504 ; SSE2: # %bb.0: # %entry 505 ; SSE2-NEXT: movdqa %xmm0, %xmm1 506 ; SSE2-NEXT: pxor %xmm2, %xmm2 507 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 508 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 509 ; SSE2-NEXT: retq 510 ; 511 ; SSSE3-LABEL: zext_8i16_to_8i32: 512 ; SSSE3: # %bb.0: # %entry 513 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 514 ; SSSE3-NEXT: pxor %xmm2, %xmm2 515 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 516 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 517 ; SSSE3-NEXT: retq 518 ; 519 ; SSE41-LABEL: zext_8i16_to_8i32: 520 ; SSE41: # %bb.0: # %entry 521 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 522 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 523 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 524 ; SSE41-NEXT: movdqa %xmm2, %xmm0 525 ; SSE41-NEXT: retq 526 ; 527 ; AVX1-LABEL: zext_8i16_to_8i32: 528 ; AVX1: # %bb.0: # %entry 529 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 530 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 531 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 532 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 533 ; AVX1-NEXT: retq 534 ; 535 ; AVX2-LABEL: zext_8i16_to_8i32: 536 ; AVX2: # %bb.0: # %entry 537 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 538 ; AVX2-NEXT: retq 539 ; 540 ; AVX512-LABEL: zext_8i16_to_8i32: 541 ; AVX512: # %bb.0: # %entry 542 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 543 ; AVX512-NEXT: retq 544 entry: 545 %B = zext <8 x i16> %A to <8 x i32> 546 ret <8 x i32>%B 547 } 548 549 define <16 x i32> @zext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp { 550 ; SSE2-LABEL: zext_16i16_to_16i32: 551 ; SSE2: # %bb.0: # %entry 552 ; SSE2-NEXT: movdqa %xmm1, %xmm3 553 ; SSE2-NEXT: movdqa %xmm0, %xmm1 554 ; SSE2-NEXT: pxor %xmm4, %xmm4 555 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 556 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 557 ; SSE2-NEXT: movdqa %xmm3, %xmm2 558 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 559 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 560 ; SSE2-NEXT: retq 561 ; 562 ; SSSE3-LABEL: zext_16i16_to_16i32: 563 ; SSSE3: # %bb.0: # %entry 564 ; SSSE3-NEXT: movdqa %xmm1, %xmm3 565 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 566 ; SSSE3-NEXT: pxor %xmm4, %xmm4 567 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 568 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 569 ; SSSE3-NEXT: movdqa %xmm3, %xmm2 570 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 571 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 572 ; SSSE3-NEXT: retq 573 ; 574 ; SSE41-LABEL: zext_16i16_to_16i32: 575 ; SSE41: # %bb.0: # %entry 576 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 577 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 578 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 579 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 580 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 581 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 582 ; SSE41-NEXT: movdqa %xmm5, %xmm0 583 ; SSE41-NEXT: movdqa %xmm4, %xmm1 584 ; SSE41-NEXT: retq 585 ; 586 ; AVX1-LABEL: zext_16i16_to_16i32: 587 ; AVX1: # %bb.0: # %entry 588 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 589 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 590 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero 591 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 592 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 593 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 594 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 595 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 596 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 597 ; AVX1-NEXT: vmovaps %ymm2, %ymm0 598 ; AVX1-NEXT: retq 599 ; 600 ; AVX2-LABEL: zext_16i16_to_16i32: 601 ; AVX2: # %bb.0: # %entry 602 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 603 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 604 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 605 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0 606 ; AVX2-NEXT: retq 607 ; 608 ; AVX512-LABEL: zext_16i16_to_16i32: 609 ; AVX512: # %bb.0: # %entry 610 ; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 611 ; AVX512-NEXT: retq 612 entry: 613 %B = zext <16 x i16> %A to <16 x i32> 614 ret <16 x i32> %B 615 } 616 617 define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp { 618 ; SSE2-LABEL: zext_8i16_to_2i64: 619 ; SSE2: # %bb.0: # %entry 620 ; SSE2-NEXT: pxor %xmm1, %xmm1 621 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 622 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 623 ; SSE2-NEXT: retq 624 ; 625 ; SSSE3-LABEL: zext_8i16_to_2i64: 626 ; SSSE3: # %bb.0: # %entry 627 ; SSSE3-NEXT: pxor %xmm1, %xmm1 628 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 629 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 630 ; SSSE3-NEXT: retq 631 ; 632 ; SSE41-LABEL: zext_8i16_to_2i64: 633 ; SSE41: # %bb.0: # %entry 634 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 635 ; SSE41-NEXT: retq 636 ; 637 ; AVX-LABEL: zext_8i16_to_2i64: 638 ; AVX: # %bb.0: # %entry 639 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 640 ; AVX-NEXT: retq 641 entry: 642 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 643 %C = zext <2 x i16> %B to <2 x i64> 644 ret <2 x i64> %C 645 } 646 647 define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp { 648 ; SSE2-LABEL: zext_8i16_to_4i64: 649 ; SSE2: # %bb.0: # %entry 650 ; SSE2-NEXT: movdqa %xmm0, %xmm1 651 ; SSE2-NEXT: pxor %xmm2, %xmm2 652 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 653 ; SSE2-NEXT: movdqa %xmm1, %xmm0 654 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 655 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 656 ; SSE2-NEXT: retq 657 ; 658 ; SSSE3-LABEL: zext_8i16_to_4i64: 659 ; SSSE3: # %bb.0: # %entry 660 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 661 ; SSSE3-NEXT: pxor %xmm2, %xmm2 662 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 663 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 664 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 665 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 666 ; SSSE3-NEXT: retq 667 ; 668 ; SSE41-LABEL: zext_8i16_to_4i64: 669 ; SSE41: # %bb.0: # %entry 670 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 671 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 672 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 673 ; SSE41-NEXT: movdqa %xmm2, %xmm0 674 ; SSE41-NEXT: retq 675 ; 676 ; AVX1-LABEL: zext_8i16_to_4i64: 677 ; AVX1: # %bb.0: # %entry 678 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 679 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 680 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 681 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 682 ; AVX1-NEXT: retq 683 ; 684 ; AVX2-LABEL: zext_8i16_to_4i64: 685 ; AVX2: # %bb.0: # %entry 686 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 687 ; AVX2-NEXT: retq 688 ; 689 ; AVX512-LABEL: zext_8i16_to_4i64: 690 ; AVX512: # %bb.0: # %entry 691 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 692 ; AVX512-NEXT: retq 693 entry: 694 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 695 %C = zext <4 x i16> %B to <4 x i64> 696 ret <4 x i64> %C 697 } 698 699 define <8 x i64> @zext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp { 700 ; SSE2-LABEL: zext_8i16_to_8i64: 701 ; SSE2: # %bb.0: # %entry 702 ; SSE2-NEXT: movdqa %xmm0, %xmm3 703 ; SSE2-NEXT: pxor %xmm4, %xmm4 704 ; SSE2-NEXT: movdqa %xmm0, %xmm1 705 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 706 ; SSE2-NEXT: movdqa %xmm1, %xmm0 707 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 708 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 709 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 710 ; SSE2-NEXT: movdqa %xmm3, %xmm2 711 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 712 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 713 ; SSE2-NEXT: retq 714 ; 715 ; SSSE3-LABEL: zext_8i16_to_8i64: 716 ; SSSE3: # %bb.0: # %entry 717 ; SSSE3-NEXT: movdqa %xmm0, %xmm3 718 ; SSSE3-NEXT: pxor %xmm4, %xmm4 719 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 720 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 721 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 722 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 723 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 724 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 725 ; SSSE3-NEXT: movdqa %xmm3, %xmm2 726 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 727 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 728 ; SSSE3-NEXT: retq 729 ; 730 ; SSE41-LABEL: zext_8i16_to_8i64: 731 ; SSE41: # %bb.0: # %entry 732 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 733 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 734 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 735 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 736 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 737 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 738 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 739 ; SSE41-NEXT: movdqa %xmm4, %xmm0 740 ; SSE41-NEXT: retq 741 ; 742 ; AVX1-LABEL: zext_8i16_to_8i64: 743 ; AVX1: # %bb.0: # %entry 744 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 745 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 746 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 747 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 748 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 749 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 750 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 751 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 752 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 753 ; AVX1-NEXT: vmovaps %ymm2, %ymm0 754 ; AVX1-NEXT: retq 755 ; 756 ; AVX2-LABEL: zext_8i16_to_8i64: 757 ; AVX2: # %bb.0: # %entry 758 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 759 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 760 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 761 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0 762 ; AVX2-NEXT: retq 763 ; 764 ; AVX512-LABEL: zext_8i16_to_8i64: 765 ; AVX512: # %bb.0: # %entry 766 ; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 767 ; AVX512-NEXT: retq 768 entry: 769 %B = zext <8 x i16> %A to <8 x i64> 770 ret <8 x i64> %B 771 } 772 773 define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp { 774 ; SSE2-LABEL: zext_4i32_to_2i64: 775 ; SSE2: # %bb.0: # %entry 776 ; SSE2-NEXT: xorps %xmm1, %xmm1 777 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 778 ; SSE2-NEXT: retq 779 ; 780 ; SSSE3-LABEL: zext_4i32_to_2i64: 781 ; SSSE3: # %bb.0: # %entry 782 ; SSSE3-NEXT: xorps %xmm1, %xmm1 783 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 784 ; SSSE3-NEXT: retq 785 ; 786 ; SSE41-LABEL: zext_4i32_to_2i64: 787 ; SSE41: # %bb.0: # %entry 788 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 789 ; SSE41-NEXT: retq 790 ; 791 ; AVX-LABEL: zext_4i32_to_2i64: 792 ; AVX: # %bb.0: # %entry 793 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 794 ; AVX-NEXT: retq 795 entry: 796 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 797 %C = zext <2 x i32> %B to <2 x i64> 798 ret <2 x i64> %C 799 } 800 801 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 802 ; SSE2-LABEL: zext_4i32_to_4i64: 803 ; SSE2: # %bb.0: # %entry 804 ; SSE2-NEXT: movaps %xmm0, %xmm1 805 ; SSE2-NEXT: xorps %xmm2, %xmm2 806 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 807 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 808 ; SSE2-NEXT: retq 809 ; 810 ; SSSE3-LABEL: zext_4i32_to_4i64: 811 ; SSSE3: # %bb.0: # %entry 812 ; SSSE3-NEXT: movaps %xmm0, %xmm1 813 ; SSSE3-NEXT: xorps %xmm2, %xmm2 814 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 815 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 816 ; SSSE3-NEXT: retq 817 ; 818 ; SSE41-LABEL: zext_4i32_to_4i64: 819 ; SSE41: # %bb.0: # %entry 820 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero 821 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 822 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero 823 ; SSE41-NEXT: movdqa %xmm2, %xmm0 824 ; SSE41-NEXT: retq 825 ; 826 ; AVX1-LABEL: zext_4i32_to_4i64: 827 ; AVX1: # %bb.0: # %entry 828 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero 829 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 830 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 831 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 832 ; AVX1-NEXT: retq 833 ; 834 ; AVX2-LABEL: zext_4i32_to_4i64: 835 ; AVX2: # %bb.0: # %entry 836 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 837 ; AVX2-NEXT: retq 838 ; 839 ; AVX512-LABEL: zext_4i32_to_4i64: 840 ; AVX512: # %bb.0: # %entry 841 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 842 ; AVX512-NEXT: retq 843 entry: 844 %B = zext <4 x i32> %A to <4 x i64> 845 ret <4 x i64>%B 846 } 847 848 define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp { 849 ; SSE2-LABEL: zext_8i32_to_8i64: 850 ; SSE2: # %bb.0: # %entry 851 ; SSE2-NEXT: movaps %xmm1, %xmm3 852 ; SSE2-NEXT: movaps %xmm0, %xmm1 853 ; SSE2-NEXT: xorps %xmm4, %xmm4 854 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 855 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 856 ; SSE2-NEXT: movaps %xmm3, %xmm2 857 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 858 ; SSE2-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 859 ; SSE2-NEXT: retq 860 ; 861 ; SSSE3-LABEL: zext_8i32_to_8i64: 862 ; SSSE3: # %bb.0: # %entry 863 ; SSSE3-NEXT: movaps %xmm1, %xmm3 864 ; SSSE3-NEXT: movaps %xmm0, %xmm1 865 ; SSSE3-NEXT: xorps %xmm4, %xmm4 866 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 867 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 868 ; SSSE3-NEXT: movaps %xmm3, %xmm2 869 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 870 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 871 ; SSSE3-NEXT: retq 872 ; 873 ; SSE41-LABEL: zext_8i32_to_8i64: 874 ; SSE41: # %bb.0: # %entry 875 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero 876 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero 877 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 878 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero 879 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 880 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero 881 ; SSE41-NEXT: movdqa %xmm5, %xmm0 882 ; SSE41-NEXT: movdqa %xmm4, %xmm1 883 ; SSE41-NEXT: retq 884 ; 885 ; AVX1-LABEL: zext_8i32_to_8i64: 886 ; AVX1: # %bb.0: # %entry 887 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero 888 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 889 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero 890 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 891 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 892 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero 893 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 894 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 895 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 896 ; AVX1-NEXT: vmovaps %ymm2, %ymm0 897 ; AVX1-NEXT: retq 898 ; 899 ; AVX2-LABEL: zext_8i32_to_8i64: 900 ; AVX2: # %bb.0: # %entry 901 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 902 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 903 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 904 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0 905 ; AVX2-NEXT: retq 906 ; 907 ; AVX512-LABEL: zext_8i32_to_8i64: 908 ; AVX512: # %bb.0: # %entry 909 ; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 910 ; AVX512-NEXT: retq 911 entry: 912 %B = zext <8 x i32> %A to <8 x i64> 913 ret <8 x i64>%B 914 } 915 916 define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) { 917 ; SSE2-LABEL: load_zext_2i8_to_2i64: 918 ; SSE2: # %bb.0: # %entry 919 ; SSE2-NEXT: movzwl (%rdi), %eax 920 ; SSE2-NEXT: movd %eax, %xmm0 921 ; SSE2-NEXT: pxor %xmm1, %xmm1 922 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 923 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 924 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 925 ; SSE2-NEXT: retq 926 ; 927 ; SSSE3-LABEL: load_zext_2i8_to_2i64: 928 ; SSSE3: # %bb.0: # %entry 929 ; SSSE3-NEXT: movzwl (%rdi), %eax 930 ; SSSE3-NEXT: movd %eax, %xmm0 931 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 932 ; SSSE3-NEXT: retq 933 ; 934 ; SSE41-LABEL: load_zext_2i8_to_2i64: 935 ; SSE41: # %bb.0: # %entry 936 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 937 ; SSE41-NEXT: retq 938 ; 939 ; AVX-LABEL: load_zext_2i8_to_2i64: 940 ; AVX: # %bb.0: # %entry 941 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 942 ; AVX-NEXT: retq 943 entry: 944 %X = load <2 x i8>, <2 x i8>* %ptr 945 %Y = zext <2 x i8> %X to <2 x i64> 946 ret <2 x i64> %Y 947 } 948 949 define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) { 950 ; SSE2-LABEL: load_zext_4i8_to_4i32: 951 ; SSE2: # %bb.0: # %entry 952 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 953 ; SSE2-NEXT: pxor %xmm1, %xmm1 954 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 955 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 956 ; SSE2-NEXT: retq 957 ; 958 ; SSSE3-LABEL: load_zext_4i8_to_4i32: 959 ; SSSE3: # %bb.0: # %entry 960 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 961 ; SSSE3-NEXT: pxor %xmm1, %xmm1 962 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 963 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 964 ; SSSE3-NEXT: retq 965 ; 966 ; SSE41-LABEL: load_zext_4i8_to_4i32: 967 ; SSE41: # %bb.0: # %entry 968 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 969 ; SSE41-NEXT: retq 970 ; 971 ; AVX-LABEL: load_zext_4i8_to_4i32: 972 ; AVX: # %bb.0: # %entry 973 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 974 ; AVX-NEXT: retq 975 entry: 976 %X = load <4 x i8>, <4 x i8>* %ptr 977 %Y = zext <4 x i8> %X to <4 x i32> 978 ret <4 x i32> %Y 979 } 980 981 define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) { 982 ; SSE2-LABEL: load_zext_4i8_to_4i64: 983 ; SSE2: # %bb.0: # %entry 984 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 985 ; SSE2-NEXT: pxor %xmm2, %xmm2 986 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 987 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 988 ; SSE2-NEXT: movdqa %xmm1, %xmm0 989 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 990 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 991 ; SSE2-NEXT: retq 992 ; 993 ; SSSE3-LABEL: load_zext_4i8_to_4i64: 994 ; SSSE3: # %bb.0: # %entry 995 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 996 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 997 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 998 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero 999 ; SSSE3-NEXT: retq 1000 ; 1001 ; SSE41-LABEL: load_zext_4i8_to_4i64: 1002 ; SSE41: # %bb.0: # %entry 1003 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1004 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1005 ; SSE41-NEXT: retq 1006 ; 1007 ; AVX1-LABEL: load_zext_4i8_to_4i64: 1008 ; AVX1: # %bb.0: # %entry 1009 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1010 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1011 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1012 ; AVX1-NEXT: retq 1013 ; 1014 ; AVX2-LABEL: load_zext_4i8_to_4i64: 1015 ; AVX2: # %bb.0: # %entry 1016 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 1017 ; AVX2-NEXT: retq 1018 ; 1019 ; AVX512-LABEL: load_zext_4i8_to_4i64: 1020 ; AVX512: # %bb.0: # %entry 1021 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 1022 ; AVX512-NEXT: retq 1023 entry: 1024 %X = load <4 x i8>, <4 x i8>* %ptr 1025 %Y = zext <4 x i8> %X to <4 x i64> 1026 ret <4 x i64> %Y 1027 } 1028 1029 define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) { 1030 ; SSE2-LABEL: load_zext_8i8_to_8i16: 1031 ; SSE2: # %bb.0: # %entry 1032 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1033 ; SSE2-NEXT: pxor %xmm1, %xmm1 1034 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1035 ; SSE2-NEXT: retq 1036 ; 1037 ; SSSE3-LABEL: load_zext_8i8_to_8i16: 1038 ; SSSE3: # %bb.0: # %entry 1039 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1040 ; SSSE3-NEXT: pxor %xmm1, %xmm1 1041 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1042 ; SSSE3-NEXT: retq 1043 ; 1044 ; SSE41-LABEL: load_zext_8i8_to_8i16: 1045 ; SSE41: # %bb.0: # %entry 1046 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1047 ; SSE41-NEXT: retq 1048 ; 1049 ; AVX-LABEL: load_zext_8i8_to_8i16: 1050 ; AVX: # %bb.0: # %entry 1051 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1052 ; AVX-NEXT: retq 1053 entry: 1054 %X = load <8 x i8>, <8 x i8>* %ptr 1055 %Y = zext <8 x i8> %X to <8 x i16> 1056 ret <8 x i16> %Y 1057 } 1058 1059 define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) { 1060 ; SSE2-LABEL: load_zext_8i8_to_8i32: 1061 ; SSE2: # %bb.0: # %entry 1062 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1063 ; SSE2-NEXT: pxor %xmm2, %xmm2 1064 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1065 ; SSE2-NEXT: movdqa %xmm1, %xmm0 1066 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1067 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1068 ; SSE2-NEXT: retq 1069 ; 1070 ; SSSE3-LABEL: load_zext_8i8_to_8i32: 1071 ; SSSE3: # %bb.0: # %entry 1072 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1073 ; SSSE3-NEXT: pxor %xmm2, %xmm2 1074 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1075 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1076 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1077 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1078 ; SSSE3-NEXT: retq 1079 ; 1080 ; SSE41-LABEL: load_zext_8i8_to_8i32: 1081 ; SSE41: # %bb.0: # %entry 1082 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1083 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1084 ; SSE41-NEXT: retq 1085 ; 1086 ; AVX1-LABEL: load_zext_8i8_to_8i32: 1087 ; AVX1: # %bb.0: # %entry 1088 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1089 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1090 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1091 ; AVX1-NEXT: retq 1092 ; 1093 ; AVX2-LABEL: load_zext_8i8_to_8i32: 1094 ; AVX2: # %bb.0: # %entry 1095 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1096 ; AVX2-NEXT: retq 1097 ; 1098 ; AVX512-LABEL: load_zext_8i8_to_8i32: 1099 ; AVX512: # %bb.0: # %entry 1100 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1101 ; AVX512-NEXT: retq 1102 entry: 1103 %X = load <8 x i8>, <8 x i8>* %ptr 1104 %Y = zext <8 x i8> %X to <8 x i32> 1105 ret <8 x i32> %Y 1106 } 1107 1108 define <8 x i32> @load_zext_16i8_to_8i32(<16 x i8> *%ptr) { 1109 ; SSE2-LABEL: load_zext_16i8_to_8i32: 1110 ; SSE2: # %bb.0: # %entry 1111 ; SSE2-NEXT: movdqa (%rdi), %xmm1 1112 ; SSE2-NEXT: pxor %xmm2, %xmm2 1113 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1114 ; SSE2-NEXT: movdqa %xmm1, %xmm0 1115 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1116 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1117 ; SSE2-NEXT: retq 1118 ; 1119 ; SSSE3-LABEL: load_zext_16i8_to_8i32: 1120 ; SSSE3: # %bb.0: # %entry 1121 ; SSSE3-NEXT: movdqa (%rdi), %xmm1 1122 ; SSSE3-NEXT: pxor %xmm2, %xmm2 1123 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1124 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1125 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1126 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1127 ; SSSE3-NEXT: retq 1128 ; 1129 ; SSE41-LABEL: load_zext_16i8_to_8i32: 1130 ; SSE41: # %bb.0: # %entry 1131 ; SSE41-NEXT: movdqa (%rdi), %xmm1 1132 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1133 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3] 1134 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1135 ; SSE41-NEXT: retq 1136 ; 1137 ; AVX1-LABEL: load_zext_16i8_to_8i32: 1138 ; AVX1: # %bb.0: # %entry 1139 ; AVX1-NEXT: vmovdqa (%rdi), %xmm0 1140 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1141 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1142 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1143 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1144 ; AVX1-NEXT: retq 1145 ; 1146 ; AVX2-LABEL: load_zext_16i8_to_8i32: 1147 ; AVX2: # %bb.0: # %entry 1148 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1149 ; AVX2-NEXT: retq 1150 ; 1151 ; AVX512-LABEL: load_zext_16i8_to_8i32: 1152 ; AVX512: # %bb.0: # %entry 1153 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1154 ; AVX512-NEXT: retq 1155 entry: 1156 %X = load <16 x i8>, <16 x i8>* %ptr 1157 %Y = shufflevector <16 x i8> %X, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1158 %Z = zext <8 x i8> %Y to <8 x i32> 1159 ret <8 x i32> %Z 1160 } 1161 1162 define <8 x i64> @load_zext_8i8_to_8i64(<8 x i8> *%ptr) { 1163 ; SSE2-LABEL: load_zext_8i8_to_8i64: 1164 ; SSE2: # %bb.0: # %entry 1165 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1166 ; SSE2-NEXT: pxor %xmm4, %xmm4 1167 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] 1168 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 1169 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 1170 ; SSE2-NEXT: movdqa %xmm1, %xmm0 1171 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 1172 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 1173 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 1174 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] 1175 ; SSE2-NEXT: movdqa %xmm3, %xmm2 1176 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 1177 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 1178 ; SSE2-NEXT: retq 1179 ; 1180 ; SSSE3-LABEL: load_zext_8i8_to_8i64: 1181 ; SSSE3: # %bb.0: # %entry 1182 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1183 ; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,128,128,128,128,128,128,128,1,128,128,128,128,128,128,128] 1184 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1185 ; SSSE3-NEXT: pshufb %xmm4, %xmm0 1186 ; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2,128,128,128,128,128,128,128,3,128,128,128,128,128,128,128] 1187 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] 1188 ; SSSE3-NEXT: pshufb %xmm5, %xmm1 1189 ; SSSE3-NEXT: movdqa %xmm3, %xmm2 1190 ; SSSE3-NEXT: pshufb %xmm4, %xmm2 1191 ; SSSE3-NEXT: pshufb %xmm5, %xmm3 1192 ; SSSE3-NEXT: retq 1193 ; 1194 ; SSE41-LABEL: load_zext_8i8_to_8i64: 1195 ; SSE41: # %bb.0: # %entry 1196 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1197 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1198 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1199 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1200 ; SSE41-NEXT: retq 1201 ; 1202 ; AVX1-LABEL: load_zext_8i8_to_8i64: 1203 ; AVX1: # %bb.0: # %entry 1204 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1205 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1206 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1207 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1208 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 1209 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1210 ; AVX1-NEXT: retq 1211 ; 1212 ; AVX2-LABEL: load_zext_8i8_to_8i64: 1213 ; AVX2: # %bb.0: # %entry 1214 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 1215 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 1216 ; AVX2-NEXT: retq 1217 ; 1218 ; AVX512-LABEL: load_zext_8i8_to_8i64: 1219 ; AVX512: # %bb.0: # %entry 1220 ; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero 1221 ; AVX512-NEXT: retq 1222 entry: 1223 %X = load <8 x i8>, <8 x i8>* %ptr 1224 %Y = zext <8 x i8> %X to <8 x i64> 1225 ret <8 x i64> %Y 1226 } 1227 1228 define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) { 1229 ; SSE2-LABEL: load_zext_16i8_to_16i16: 1230 ; SSE2: # %bb.0: # %entry 1231 ; SSE2-NEXT: movdqa (%rdi), %xmm1 1232 ; SSE2-NEXT: pxor %xmm2, %xmm2 1233 ; SSE2-NEXT: movdqa %xmm1, %xmm0 1234 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1235 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 1236 ; SSE2-NEXT: retq 1237 ; 1238 ; SSSE3-LABEL: load_zext_16i8_to_16i16: 1239 ; SSSE3: # %bb.0: # %entry 1240 ; SSSE3-NEXT: movdqa (%rdi), %xmm1 1241 ; SSSE3-NEXT: pxor %xmm2, %xmm2 1242 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1243 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1244 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 1245 ; SSSE3-NEXT: retq 1246 ; 1247 ; SSE41-LABEL: load_zext_16i8_to_16i16: 1248 ; SSE41: # %bb.0: # %entry 1249 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1250 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1251 ; SSE41-NEXT: retq 1252 ; 1253 ; AVX1-LABEL: load_zext_16i8_to_16i16: 1254 ; AVX1: # %bb.0: # %entry 1255 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1256 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1257 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1258 ; AVX1-NEXT: retq 1259 ; 1260 ; AVX2-LABEL: load_zext_16i8_to_16i16: 1261 ; AVX2: # %bb.0: # %entry 1262 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 1263 ; AVX2-NEXT: retq 1264 ; 1265 ; AVX512-LABEL: load_zext_16i8_to_16i16: 1266 ; AVX512: # %bb.0: # %entry 1267 ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 1268 ; AVX512-NEXT: retq 1269 entry: 1270 %X = load <16 x i8>, <16 x i8>* %ptr 1271 %Y = zext <16 x i8> %X to <16 x i16> 1272 ret <16 x i16> %Y 1273 } 1274 1275 define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) { 1276 ; SSE2-LABEL: load_zext_2i16_to_2i64: 1277 ; SSE2: # %bb.0: # %entry 1278 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1279 ; SSE2-NEXT: pxor %xmm1, %xmm1 1280 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1281 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1282 ; SSE2-NEXT: retq 1283 ; 1284 ; SSSE3-LABEL: load_zext_2i16_to_2i64: 1285 ; SSSE3: # %bb.0: # %entry 1286 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1287 ; SSSE3-NEXT: pxor %xmm1, %xmm1 1288 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1289 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1290 ; SSSE3-NEXT: retq 1291 ; 1292 ; SSE41-LABEL: load_zext_2i16_to_2i64: 1293 ; SSE41: # %bb.0: # %entry 1294 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1295 ; SSE41-NEXT: retq 1296 ; 1297 ; AVX-LABEL: load_zext_2i16_to_2i64: 1298 ; AVX: # %bb.0: # %entry 1299 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1300 ; AVX-NEXT: retq 1301 entry: 1302 %X = load <2 x i16>, <2 x i16>* %ptr 1303 %Y = zext <2 x i16> %X to <2 x i64> 1304 ret <2 x i64> %Y 1305 } 1306 1307 define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) { 1308 ; SSE2-LABEL: load_zext_4i16_to_4i32: 1309 ; SSE2: # %bb.0: # %entry 1310 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1311 ; SSE2-NEXT: pxor %xmm1, %xmm1 1312 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1313 ; SSE2-NEXT: retq 1314 ; 1315 ; SSSE3-LABEL: load_zext_4i16_to_4i32: 1316 ; SSSE3: # %bb.0: # %entry 1317 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1318 ; SSSE3-NEXT: pxor %xmm1, %xmm1 1319 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1320 ; SSSE3-NEXT: retq 1321 ; 1322 ; SSE41-LABEL: load_zext_4i16_to_4i32: 1323 ; SSE41: # %bb.0: # %entry 1324 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1325 ; SSE41-NEXT: retq 1326 ; 1327 ; AVX-LABEL: load_zext_4i16_to_4i32: 1328 ; AVX: # %bb.0: # %entry 1329 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1330 ; AVX-NEXT: retq 1331 entry: 1332 %X = load <4 x i16>, <4 x i16>* %ptr 1333 %Y = zext <4 x i16> %X to <4 x i32> 1334 ret <4 x i32> %Y 1335 } 1336 1337 define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) { 1338 ; SSE2-LABEL: load_zext_4i16_to_4i64: 1339 ; SSE2: # %bb.0: # %entry 1340 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1341 ; SSE2-NEXT: pxor %xmm2, %xmm2 1342 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1343 ; SSE2-NEXT: movdqa %xmm1, %xmm0 1344 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1345 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1346 ; SSE2-NEXT: retq 1347 ; 1348 ; SSSE3-LABEL: load_zext_4i16_to_4i64: 1349 ; SSSE3: # %bb.0: # %entry 1350 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1351 ; SSSE3-NEXT: pxor %xmm2, %xmm2 1352 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1353 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1354 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1355 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1356 ; SSSE3-NEXT: retq 1357 ; 1358 ; SSE41-LABEL: load_zext_4i16_to_4i64: 1359 ; SSE41: # %bb.0: # %entry 1360 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1361 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1362 ; SSE41-NEXT: retq 1363 ; 1364 ; AVX1-LABEL: load_zext_4i16_to_4i64: 1365 ; AVX1: # %bb.0: # %entry 1366 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1367 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1368 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1369 ; AVX1-NEXT: retq 1370 ; 1371 ; AVX2-LABEL: load_zext_4i16_to_4i64: 1372 ; AVX2: # %bb.0: # %entry 1373 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1374 ; AVX2-NEXT: retq 1375 ; 1376 ; AVX512-LABEL: load_zext_4i16_to_4i64: 1377 ; AVX512: # %bb.0: # %entry 1378 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1379 ; AVX512-NEXT: retq 1380 entry: 1381 %X = load <4 x i16>, <4 x i16>* %ptr 1382 %Y = zext <4 x i16> %X to <4 x i64> 1383 ret <4 x i64> %Y 1384 } 1385 1386 define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) { 1387 ; SSE2-LABEL: load_zext_8i16_to_8i32: 1388 ; SSE2: # %bb.0: # %entry 1389 ; SSE2-NEXT: movdqa (%rdi), %xmm1 1390 ; SSE2-NEXT: pxor %xmm2, %xmm2 1391 ; SSE2-NEXT: movdqa %xmm1, %xmm0 1392 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1393 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1394 ; SSE2-NEXT: retq 1395 ; 1396 ; SSSE3-LABEL: load_zext_8i16_to_8i32: 1397 ; SSSE3: # %bb.0: # %entry 1398 ; SSSE3-NEXT: movdqa (%rdi), %xmm1 1399 ; SSSE3-NEXT: pxor %xmm2, %xmm2 1400 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1401 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1402 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1403 ; SSSE3-NEXT: retq 1404 ; 1405 ; SSE41-LABEL: load_zext_8i16_to_8i32: 1406 ; SSE41: # %bb.0: # %entry 1407 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1408 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1409 ; SSE41-NEXT: retq 1410 ; 1411 ; AVX1-LABEL: load_zext_8i16_to_8i32: 1412 ; AVX1: # %bb.0: # %entry 1413 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1414 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1415 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1416 ; AVX1-NEXT: retq 1417 ; 1418 ; AVX2-LABEL: load_zext_8i16_to_8i32: 1419 ; AVX2: # %bb.0: # %entry 1420 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1421 ; AVX2-NEXT: retq 1422 ; 1423 ; AVX512-LABEL: load_zext_8i16_to_8i32: 1424 ; AVX512: # %bb.0: # %entry 1425 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1426 ; AVX512-NEXT: retq 1427 entry: 1428 %X = load <8 x i16>, <8 x i16>* %ptr 1429 %Y = zext <8 x i16> %X to <8 x i32> 1430 ret <8 x i32> %Y 1431 } 1432 1433 define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) { 1434 ; SSE2-LABEL: load_zext_2i32_to_2i64: 1435 ; SSE2: # %bb.0: # %entry 1436 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1437 ; SSE2-NEXT: xorps %xmm1, %xmm1 1438 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1439 ; SSE2-NEXT: retq 1440 ; 1441 ; SSSE3-LABEL: load_zext_2i32_to_2i64: 1442 ; SSSE3: # %bb.0: # %entry 1443 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1444 ; SSSE3-NEXT: xorps %xmm1, %xmm1 1445 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1446 ; SSSE3-NEXT: retq 1447 ; 1448 ; SSE41-LABEL: load_zext_2i32_to_2i64: 1449 ; SSE41: # %bb.0: # %entry 1450 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1451 ; SSE41-NEXT: retq 1452 ; 1453 ; AVX-LABEL: load_zext_2i32_to_2i64: 1454 ; AVX: # %bb.0: # %entry 1455 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1456 ; AVX-NEXT: retq 1457 entry: 1458 %X = load <2 x i32>, <2 x i32>* %ptr 1459 %Y = zext <2 x i32> %X to <2 x i64> 1460 ret <2 x i64> %Y 1461 } 1462 1463 define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) { 1464 ; SSE2-LABEL: load_zext_4i32_to_4i64: 1465 ; SSE2: # %bb.0: # %entry 1466 ; SSE2-NEXT: movaps (%rdi), %xmm1 1467 ; SSE2-NEXT: xorps %xmm2, %xmm2 1468 ; SSE2-NEXT: movaps %xmm1, %xmm0 1469 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1470 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1471 ; SSE2-NEXT: retq 1472 ; 1473 ; SSSE3-LABEL: load_zext_4i32_to_4i64: 1474 ; SSSE3: # %bb.0: # %entry 1475 ; SSSE3-NEXT: movaps (%rdi), %xmm1 1476 ; SSSE3-NEXT: xorps %xmm2, %xmm2 1477 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1478 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1479 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1480 ; SSSE3-NEXT: retq 1481 ; 1482 ; SSE41-LABEL: load_zext_4i32_to_4i64: 1483 ; SSE41: # %bb.0: # %entry 1484 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1485 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero 1486 ; SSE41-NEXT: retq 1487 ; 1488 ; AVX1-LABEL: load_zext_4i32_to_4i64: 1489 ; AVX1: # %bb.0: # %entry 1490 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1491 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero 1492 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1493 ; AVX1-NEXT: retq 1494 ; 1495 ; AVX2-LABEL: load_zext_4i32_to_4i64: 1496 ; AVX2: # %bb.0: # %entry 1497 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1498 ; AVX2-NEXT: retq 1499 ; 1500 ; AVX512-LABEL: load_zext_4i32_to_4i64: 1501 ; AVX512: # %bb.0: # %entry 1502 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1503 ; AVX512-NEXT: retq 1504 entry: 1505 %X = load <4 x i32>, <4 x i32>* %ptr 1506 %Y = zext <4 x i32> %X to <4 x i64> 1507 ret <4 x i64> %Y 1508 } 1509 1510 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) { 1511 ; SSE2-LABEL: zext_8i8_to_8i32: 1512 ; SSE2: # %bb.0: # %entry 1513 ; SSE2-NEXT: movdqa %xmm0, %xmm1 1514 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1515 ; SSE2-NEXT: pxor %xmm2, %xmm2 1516 ; SSE2-NEXT: movdqa %xmm1, %xmm0 1517 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1518 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1519 ; SSE2-NEXT: retq 1520 ; 1521 ; SSSE3-LABEL: zext_8i8_to_8i32: 1522 ; SSSE3: # %bb.0: # %entry 1523 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 1524 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1 1525 ; SSSE3-NEXT: pxor %xmm2, %xmm2 1526 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1527 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1528 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1529 ; SSSE3-NEXT: retq 1530 ; 1531 ; SSE41-LABEL: zext_8i8_to_8i32: 1532 ; SSE41: # %bb.0: # %entry 1533 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm0 1534 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1535 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1536 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1537 ; SSE41-NEXT: movdqa %xmm2, %xmm0 1538 ; SSE41-NEXT: retq 1539 ; 1540 ; AVX1-LABEL: zext_8i8_to_8i32: 1541 ; AVX1: # %bb.0: # %entry 1542 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 1543 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1544 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1545 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1546 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1547 ; AVX1-NEXT: retq 1548 ; 1549 ; AVX2-LABEL: zext_8i8_to_8i32: 1550 ; AVX2: # %bb.0: # %entry 1551 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 1552 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1553 ; AVX2-NEXT: retq 1554 ; 1555 ; AVX512-LABEL: zext_8i8_to_8i32: 1556 ; AVX512: # %bb.0: # %entry 1557 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 1558 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1559 ; AVX512-NEXT: retq 1560 entry: 1561 %t = zext <8 x i8> %z to <8 x i32> 1562 ret <8 x i32> %t 1563 } 1564 1565 define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 1566 ; SSE2-LABEL: shuf_zext_8i16_to_8i32: 1567 ; SSE2: # %bb.0: # %entry 1568 ; SSE2-NEXT: movdqa %xmm0, %xmm1 1569 ; SSE2-NEXT: pxor %xmm2, %xmm2 1570 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1571 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1572 ; SSE2-NEXT: retq 1573 ; 1574 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32: 1575 ; SSSE3: # %bb.0: # %entry 1576 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 1577 ; SSSE3-NEXT: pxor %xmm2, %xmm2 1578 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1579 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1580 ; SSSE3-NEXT: retq 1581 ; 1582 ; SSE41-LABEL: shuf_zext_8i16_to_8i32: 1583 ; SSE41: # %bb.0: # %entry 1584 ; SSE41-NEXT: movdqa %xmm0, %xmm1 1585 ; SSE41-NEXT: pxor %xmm2, %xmm2 1586 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1587 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1588 ; SSE41-NEXT: retq 1589 ; 1590 ; AVX1-LABEL: shuf_zext_8i16_to_8i32: 1591 ; AVX1: # %bb.0: # %entry 1592 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1593 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1594 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1595 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1596 ; AVX1-NEXT: retq 1597 ; 1598 ; AVX2-LABEL: shuf_zext_8i16_to_8i32: 1599 ; AVX2: # %bb.0: # %entry 1600 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1601 ; AVX2-NEXT: retq 1602 ; 1603 ; AVX512-LABEL: shuf_zext_8i16_to_8i32: 1604 ; AVX512: # %bb.0: # %entry 1605 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1606 ; AVX512-NEXT: retq 1607 entry: 1608 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8> 1609 %Z = bitcast <16 x i16> %B to <8 x i32> 1610 ret <8 x i32> %Z 1611 } 1612 1613 define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 1614 ; SSE2-LABEL: shuf_zext_4i32_to_4i64: 1615 ; SSE2: # %bb.0: # %entry 1616 ; SSE2-NEXT: movaps %xmm0, %xmm1 1617 ; SSE2-NEXT: xorps %xmm2, %xmm2 1618 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1619 ; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1620 ; SSE2-NEXT: retq 1621 ; 1622 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64: 1623 ; SSSE3: # %bb.0: # %entry 1624 ; SSSE3-NEXT: movaps %xmm0, %xmm1 1625 ; SSSE3-NEXT: xorps %xmm2, %xmm2 1626 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1627 ; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1628 ; SSSE3-NEXT: retq 1629 ; 1630 ; SSE41-LABEL: shuf_zext_4i32_to_4i64: 1631 ; SSE41: # %bb.0: # %entry 1632 ; SSE41-NEXT: movdqa %xmm0, %xmm1 1633 ; SSE41-NEXT: pxor %xmm2, %xmm2 1634 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1635 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1636 ; SSE41-NEXT: retq 1637 ; 1638 ; AVX1-LABEL: shuf_zext_4i32_to_4i64: 1639 ; AVX1: # %bb.0: # %entry 1640 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1641 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1642 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1643 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1644 ; AVX1-NEXT: retq 1645 ; 1646 ; AVX2-LABEL: shuf_zext_4i32_to_4i64: 1647 ; AVX2: # %bb.0: # %entry 1648 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1649 ; AVX2-NEXT: retq 1650 ; 1651 ; AVX512-LABEL: shuf_zext_4i32_to_4i64: 1652 ; AVX512: # %bb.0: # %entry 1653 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1654 ; AVX512-NEXT: retq 1655 entry: 1656 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4> 1657 %Z = bitcast <8 x i32> %B to <4 x i64> 1658 ret <4 x i64> %Z 1659 } 1660 1661 define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) { 1662 ; SSE2-LABEL: shuf_zext_8i8_to_8i32: 1663 ; SSE2: # %bb.0: # %entry 1664 ; SSE2-NEXT: movdqa %xmm0, %xmm1 1665 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1666 ; SSE2-NEXT: packuswb %xmm1, %xmm1 1667 ; SSE2-NEXT: pxor %xmm2, %xmm2 1668 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1669 ; SSE2-NEXT: movdqa %xmm1, %xmm0 1670 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1671 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1672 ; SSE2-NEXT: retq 1673 ; 1674 ; SSSE3-LABEL: shuf_zext_8i8_to_8i32: 1675 ; SSSE3: # %bb.0: # %entry 1676 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 1677 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1 1678 ; SSSE3-NEXT: pxor %xmm2, %xmm2 1679 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1680 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1681 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1682 ; SSSE3-NEXT: retq 1683 ; 1684 ; SSE41-LABEL: shuf_zext_8i8_to_8i32: 1685 ; SSE41: # %bb.0: # %entry 1686 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1687 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1688 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1689 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1690 ; SSE41-NEXT: movdqa %xmm2, %xmm0 1691 ; SSE41-NEXT: retq 1692 ; 1693 ; AVX1-LABEL: shuf_zext_8i8_to_8i32: 1694 ; AVX1: # %bb.0: # %entry 1695 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1696 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1697 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1698 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1699 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1700 ; AVX1-NEXT: retq 1701 ; 1702 ; AVX2-LABEL: shuf_zext_8i8_to_8i32: 1703 ; AVX2: # %bb.0: # %entry 1704 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1705 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1706 ; AVX2-NEXT: retq 1707 ; 1708 ; AVX512-LABEL: shuf_zext_8i8_to_8i32: 1709 ; AVX512: # %bb.0: # %entry 1710 ; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1711 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1712 ; AVX512-NEXT: retq 1713 entry: 1714 %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8> 1715 %Z = bitcast <32 x i8> %B to <8 x i32> 1716 ret <8 x i32> %Z 1717 } 1718 1719 define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp { 1720 ; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6: 1721 ; SSE2: # %bb.0: # %entry 1722 ; SSE2-NEXT: pxor %xmm1, %xmm1 1723 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1724 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1725 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1726 ; SSE2-NEXT: retq 1727 ; 1728 ; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6: 1729 ; SSSE3: # %bb.0: # %entry 1730 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 1731 ; SSSE3-NEXT: retq 1732 ; 1733 ; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6: 1734 ; SSE41: # %bb.0: # %entry 1735 ; SSE41-NEXT: psrlq $48, %xmm0 1736 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1737 ; SSE41-NEXT: retq 1738 ; 1739 ; AVX-LABEL: shuf_zext_16i8_to_2i64_offset6: 1740 ; AVX: # %bb.0: # %entry 1741 ; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0 1742 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1743 ; AVX-NEXT: retq 1744 entry: 1745 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 1746 %Z = bitcast <16 x i8> %B to <2 x i64> 1747 ret <2 x i64> %Z 1748 } 1749 1750 define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp { 1751 ; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11: 1752 ; SSE2: # %bb.0: # %entry 1753 ; SSE2-NEXT: movdqa %xmm0, %xmm1 1754 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 1755 ; SSE2-NEXT: pxor %xmm2, %xmm2 1756 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 1757 ; SSE2-NEXT: movdqa %xmm1, %xmm0 1758 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1759 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1760 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1761 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1762 ; SSE2-NEXT: retq 1763 ; 1764 ; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11: 1765 ; SSSE3: # %bb.0: # %entry 1766 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 1767 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero 1768 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero 1769 ; SSSE3-NEXT: retq 1770 ; 1771 ; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11: 1772 ; SSE41: # %bb.0: # %entry 1773 ; SSE41-NEXT: movdqa %xmm0, %xmm1 1774 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1775 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1776 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1777 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1778 ; SSE41-NEXT: movdqa %xmm2, %xmm0 1779 ; SSE41-NEXT: retq 1780 ; 1781 ; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11: 1782 ; AVX1: # %bb.0: # %entry 1783 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1784 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1785 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1786 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1787 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1788 ; AVX1-NEXT: retq 1789 ; 1790 ; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11: 1791 ; AVX2: # %bb.0: # %entry 1792 ; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1793 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 1794 ; AVX2-NEXT: retq 1795 ; 1796 ; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11: 1797 ; AVX512: # %bb.0: # %entry 1798 ; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1799 ; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 1800 ; AVX512-NEXT: retq 1801 entry: 1802 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 1803 %Z = bitcast <32 x i8> %B to <4 x i64> 1804 ret <4 x i64> %Z 1805 } 1806 1807 define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp { 1808 ; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6: 1809 ; SSE2: # %bb.0: # %entry 1810 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1811 ; SSE2-NEXT: pxor %xmm1, %xmm1 1812 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1813 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1814 ; SSE2-NEXT: retq 1815 ; 1816 ; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6: 1817 ; SSSE3: # %bb.0: # %entry 1818 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero 1819 ; SSSE3-NEXT: retq 1820 ; 1821 ; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6: 1822 ; SSE41: # %bb.0: # %entry 1823 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1824 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1825 ; SSE41-NEXT: retq 1826 ; 1827 ; AVX-LABEL: shuf_zext_8i16_to_2i64_offset6: 1828 ; AVX: # %bb.0: # %entry 1829 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1830 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1831 ; AVX-NEXT: retq 1832 entry: 1833 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8> 1834 %Z = bitcast <8 x i16> %B to <2 x i64> 1835 ret <2 x i64> %Z 1836 } 1837 1838 define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp { 1839 ; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2: 1840 ; SSE2: # %bb.0: # %entry 1841 ; SSE2-NEXT: movdqa %xmm0, %xmm1 1842 ; SSE2-NEXT: pxor %xmm2, %xmm2 1843 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1844 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1845 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1846 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1847 ; SSE2-NEXT: retq 1848 ; 1849 ; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2: 1850 ; SSSE3: # %bb.0: # %entry 1851 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 1852 ; SSSE3-NEXT: pxor %xmm2, %xmm2 1853 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1854 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1855 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1856 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1857 ; SSSE3-NEXT: retq 1858 ; 1859 ; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2: 1860 ; SSE41: # %bb.0: # %entry 1861 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1862 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 1863 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1864 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1865 ; SSE41-NEXT: movdqa %xmm2, %xmm0 1866 ; SSE41-NEXT: retq 1867 ; 1868 ; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2: 1869 ; AVX1: # %bb.0: # %entry 1870 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1871 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 1872 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1873 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1874 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1875 ; AVX1-NEXT: retq 1876 ; 1877 ; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2: 1878 ; AVX2: # %bb.0: # %entry 1879 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] 1880 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1881 ; AVX2-NEXT: retq 1882 ; 1883 ; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2: 1884 ; AVX512: # %bb.0: # %entry 1885 ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] 1886 ; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1887 ; AVX512-NEXT: retq 1888 entry: 1889 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8> 1890 %Z = bitcast <16 x i16> %B to <4 x i64> 1891 ret <4 x i64> %Z 1892 } 1893 1894 define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp { 1895 ; SSE2-LABEL: shuf_zext_8i16_to_4i32_offset1: 1896 ; SSE2: # %bb.0: # %entry 1897 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1898 ; SSE2-NEXT: pxor %xmm1, %xmm1 1899 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1900 ; SSE2-NEXT: retq 1901 ; 1902 ; SSSE3-LABEL: shuf_zext_8i16_to_4i32_offset1: 1903 ; SSSE3: # %bb.0: # %entry 1904 ; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1905 ; SSSE3-NEXT: pxor %xmm1, %xmm1 1906 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1907 ; SSSE3-NEXT: retq 1908 ; 1909 ; SSE41-LABEL: shuf_zext_8i16_to_4i32_offset1: 1910 ; SSE41: # %bb.0: # %entry 1911 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1912 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1913 ; SSE41-NEXT: retq 1914 ; 1915 ; AVX1-LABEL: shuf_zext_8i16_to_4i32_offset1: 1916 ; AVX1: # %bb.0: # %entry 1917 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1918 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1919 ; AVX1-NEXT: retq 1920 ; 1921 ; AVX2-SLOW-LABEL: shuf_zext_8i16_to_4i32_offset1: 1922 ; AVX2-SLOW: # %bb.0: # %entry 1923 ; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1924 ; AVX2-SLOW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1925 ; AVX2-SLOW-NEXT: retq 1926 ; 1927 ; AVX2-FAST-LABEL: shuf_zext_8i16_to_4i32_offset1: 1928 ; AVX2-FAST: # %bb.0: # %entry 1929 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero 1930 ; AVX2-FAST-NEXT: retq 1931 ; 1932 ; AVX512F-LABEL: shuf_zext_8i16_to_4i32_offset1: 1933 ; AVX512F: # %bb.0: # %entry 1934 ; AVX512F-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1935 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1936 ; AVX512F-NEXT: retq 1937 ; 1938 ; AVX512BW-LABEL: shuf_zext_8i16_to_4i32_offset1: 1939 ; AVX512BW: # %bb.0: # %entry 1940 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero 1941 ; AVX512BW-NEXT: retq 1942 entry: 1943 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8> 1944 %Z = bitcast <8 x i16> %B to <4 x i32> 1945 ret <4 x i32> %Z 1946 } 1947 1948 define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp { 1949 ; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3: 1950 ; SSE2: # %bb.0: # %entry 1951 ; SSE2-NEXT: movdqa %xmm0, %xmm1 1952 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1953 ; SSE2-NEXT: pxor %xmm2, %xmm2 1954 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1955 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1956 ; SSE2-NEXT: retq 1957 ; 1958 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3: 1959 ; SSSE3: # %bb.0: # %entry 1960 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 1961 ; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1962 ; SSSE3-NEXT: pxor %xmm2, %xmm2 1963 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1964 ; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1965 ; SSSE3-NEXT: retq 1966 ; 1967 ; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3: 1968 ; SSE41: # %bb.0: # %entry 1969 ; SSE41-NEXT: movdqa %xmm0, %xmm1 1970 ; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1971 ; SSE41-NEXT: pxor %xmm2, %xmm2 1972 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1973 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1974 ; SSE41-NEXT: retq 1975 ; 1976 ; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3: 1977 ; AVX1: # %bb.0: # %entry 1978 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1979 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1980 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1981 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1982 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1983 ; AVX1-NEXT: retq 1984 ; 1985 ; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3: 1986 ; AVX2: # %bb.0: # %entry 1987 ; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1988 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1989 ; AVX2-NEXT: retq 1990 ; 1991 ; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3: 1992 ; AVX512: # %bb.0: # %entry 1993 ; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1994 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1995 ; AVX512-NEXT: retq 1996 entry: 1997 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8> 1998 %Z = bitcast <16 x i16> %B to <8 x i32> 1999 ret <8 x i32> %Z 2000 } 2001 2002 define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp { 2003 ; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8: 2004 ; SSE2: # %bb.0: # %entry 2005 ; SSE2-NEXT: pxor %xmm2, %xmm2 2006 ; SSE2-NEXT: movdqa %xmm1, %xmm0 2007 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2008 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2009 ; SSE2-NEXT: retq 2010 ; 2011 ; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8: 2012 ; SSSE3: # %bb.0: # %entry 2013 ; SSSE3-NEXT: pxor %xmm2, %xmm2 2014 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 2015 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2016 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2017 ; SSSE3-NEXT: retq 2018 ; 2019 ; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8: 2020 ; SSE41: # %bb.0: # %entry 2021 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3] 2022 ; SSE41-NEXT: pxor %xmm2, %xmm2 2023 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] 2024 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 2025 ; SSE41-NEXT: movdqa %xmm2, %xmm1 2026 ; SSE41-NEXT: retq 2027 ; 2028 ; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8: 2029 ; AVX1: # %bb.0: # %entry 2030 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2031 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3] 2032 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 2033 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] 2034 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2035 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2036 ; AVX1-NEXT: retq 2037 ; 2038 ; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8: 2039 ; AVX2: # %bb.0: # %entry 2040 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 2041 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2042 ; AVX2-NEXT: retq 2043 ; 2044 ; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8: 2045 ; AVX512: # %bb.0: # %entry 2046 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 2047 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2048 ; AVX512-NEXT: retq 2049 entry: 2050 %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16> 2051 %Z = bitcast <16 x i16> %B to <8 x i32> 2052 ret <8 x i32> %Z 2053 } 2054 2055 define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp { 2056 ; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2: 2057 ; SSE: # %bb.0: # %entry 2058 ; SSE-NEXT: xorps %xmm1, %xmm1 2059 ; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2060 ; SSE-NEXT: retq 2061 ; 2062 ; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2: 2063 ; AVX: # %bb.0: # %entry 2064 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 2065 ; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2066 ; AVX-NEXT: retq 2067 entry: 2068 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4> 2069 %Z = bitcast <4 x i32> %B to <2 x i64> 2070 ret <2 x i64> %Z 2071 } 2072 2073 define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp { 2074 ; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1: 2075 ; SSE2: # %bb.0: # %entry 2076 ; SSE2-NEXT: movdqa %xmm0, %xmm1 2077 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0] 2078 ; SSE2-NEXT: pand %xmm1, %xmm0 2079 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2080 ; SSE2-NEXT: retq 2081 ; 2082 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1: 2083 ; SSSE3: # %bb.0: # %entry 2084 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 2085 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0] 2086 ; SSSE3-NEXT: pand %xmm1, %xmm0 2087 ; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2088 ; SSSE3-NEXT: retq 2089 ; 2090 ; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1: 2091 ; SSE41: # %bb.0: # %entry 2092 ; SSE41-NEXT: movdqa %xmm0, %xmm1 2093 ; SSE41-NEXT: pxor %xmm0, %xmm0 2094 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 2095 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2096 ; SSE41-NEXT: retq 2097 ; 2098 ; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1: 2099 ; AVX1: # %bb.0: # %entry 2100 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2101 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 2102 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2103 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2104 ; AVX1-NEXT: retq 2105 ; 2106 ; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1: 2107 ; AVX2: # %bb.0: # %entry 2108 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3] 2109 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2110 ; AVX2-NEXT: retq 2111 ; 2112 ; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1: 2113 ; AVX512: # %bb.0: # %entry 2114 ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3] 2115 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2116 ; AVX512-NEXT: retq 2117 entry: 2118 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4> 2119 %Z = bitcast <8 x i32> %B to <4 x i64> 2120 ret <4 x i64> %Z 2121 } 2122 2123 define <32 x i32> @zext_32i8_to_32i32(<32 x i8> %x) { 2124 ; SSE2-LABEL: zext_32i8_to_32i32: 2125 ; SSE2: # %bb.0: 2126 ; SSE2-NEXT: pxor %xmm2, %xmm2 2127 ; SSE2-NEXT: movdqa %xmm0, %xmm3 2128 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2129 ; SSE2-NEXT: movdqa %xmm3, %xmm8 2130 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3] 2131 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2132 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] 2133 ; SSE2-NEXT: movdqa %xmm0, %xmm5 2134 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3] 2135 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2136 ; SSE2-NEXT: movdqa %xmm1, %xmm6 2137 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7] 2138 ; SSE2-NEXT: movdqa %xmm6, %xmm7 2139 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3] 2140 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7] 2141 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 2142 ; SSE2-NEXT: movdqa %xmm1, %xmm4 2143 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] 2144 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2145 ; SSE2-NEXT: movdqa %xmm1, 112(%rdi) 2146 ; SSE2-NEXT: movdqa %xmm4, 96(%rdi) 2147 ; SSE2-NEXT: movdqa %xmm6, 80(%rdi) 2148 ; SSE2-NEXT: movdqa %xmm7, 64(%rdi) 2149 ; SSE2-NEXT: movdqa %xmm0, 48(%rdi) 2150 ; SSE2-NEXT: movdqa %xmm5, 32(%rdi) 2151 ; SSE2-NEXT: movdqa %xmm3, 16(%rdi) 2152 ; SSE2-NEXT: movdqa %xmm8, (%rdi) 2153 ; SSE2-NEXT: movq %rdi, %rax 2154 ; SSE2-NEXT: retq 2155 ; 2156 ; SSSE3-LABEL: zext_32i8_to_32i32: 2157 ; SSSE3: # %bb.0: 2158 ; SSSE3-NEXT: pxor %xmm2, %xmm2 2159 ; SSSE3-NEXT: movdqa %xmm0, %xmm3 2160 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2161 ; SSSE3-NEXT: movdqa %xmm3, %xmm8 2162 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3] 2163 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2164 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] 2165 ; SSSE3-NEXT: movdqa %xmm0, %xmm5 2166 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3] 2167 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2168 ; SSSE3-NEXT: movdqa %xmm1, %xmm6 2169 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7] 2170 ; SSSE3-NEXT: movdqa %xmm6, %xmm7 2171 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3] 2172 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7] 2173 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 2174 ; SSSE3-NEXT: movdqa %xmm1, %xmm4 2175 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] 2176 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2177 ; SSSE3-NEXT: movdqa %xmm1, 112(%rdi) 2178 ; SSSE3-NEXT: movdqa %xmm4, 96(%rdi) 2179 ; SSSE3-NEXT: movdqa %xmm6, 80(%rdi) 2180 ; SSSE3-NEXT: movdqa %xmm7, 64(%rdi) 2181 ; SSSE3-NEXT: movdqa %xmm0, 48(%rdi) 2182 ; SSSE3-NEXT: movdqa %xmm5, 32(%rdi) 2183 ; SSSE3-NEXT: movdqa %xmm3, 16(%rdi) 2184 ; SSSE3-NEXT: movdqa %xmm8, (%rdi) 2185 ; SSSE3-NEXT: movq %rdi, %rax 2186 ; SSSE3-NEXT: retq 2187 ; 2188 ; SSE41-LABEL: zext_32i8_to_32i32: 2189 ; SSE41: # %bb.0: 2190 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2191 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3] 2192 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 2193 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1] 2194 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero 2195 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 2196 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2197 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 2198 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,1,2,3] 2199 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero 2200 ; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,0,1] 2201 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero 2202 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3] 2203 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 2204 ; SSE41-NEXT: movdqa %xmm1, 112(%rdi) 2205 ; SSE41-NEXT: movdqa %xmm7, 96(%rdi) 2206 ; SSE41-NEXT: movdqa %xmm6, 80(%rdi) 2207 ; SSE41-NEXT: movdqa %xmm5, 64(%rdi) 2208 ; SSE41-NEXT: movdqa %xmm0, 48(%rdi) 2209 ; SSE41-NEXT: movdqa %xmm4, 32(%rdi) 2210 ; SSE41-NEXT: movdqa %xmm3, 16(%rdi) 2211 ; SSE41-NEXT: movdqa %xmm2, (%rdi) 2212 ; SSE41-NEXT: movq %rdi, %rax 2213 ; SSE41-NEXT: retq 2214 ; 2215 ; AVX1-LABEL: zext_32i8_to_32i32: 2216 ; AVX1: # %bb.0: 2217 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2218 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 2219 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 2220 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4 2221 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2222 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 2223 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[1,1,2,3] 2224 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 2225 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 2226 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 2227 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 2228 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 2229 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2230 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 2231 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm3[2,3,0,1] 2232 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2233 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,1,2,3] 2234 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 2235 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3 2236 ; AVX1-NEXT: vmovaps %ymm4, %ymm0 2237 ; AVX1-NEXT: retq 2238 ; 2239 ; AVX2-LABEL: zext_32i8_to_32i32: 2240 ; AVX2: # %bb.0: 2241 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2242 ; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] 2243 ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 2244 ; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[3,1,2,3] 2245 ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 2246 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 2247 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm3 2248 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2249 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero 2250 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 2251 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2252 ; AVX2-NEXT: vmovdqa %ymm4, %ymm0 2253 ; AVX2-NEXT: retq 2254 ; 2255 ; AVX512-LABEL: zext_32i8_to_32i32: 2256 ; AVX512: # %bb.0: 2257 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 2258 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 2259 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 2260 ; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0 2261 ; AVX512-NEXT: retq 2262 %res = zext <32 x i8>%x to <32 x i32> 2263 ret <32 x i32> %res 2264 } 2265 2266 define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) { 2267 ; SSE2-LABEL: zext_2i8_to_2i32: 2268 ; SSE2: # %bb.0: 2269 ; SSE2-NEXT: movzwl (%rdi), %eax 2270 ; SSE2-NEXT: movd %eax, %xmm0 2271 ; SSE2-NEXT: pxor %xmm1, %xmm1 2272 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2273 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2274 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 2275 ; SSE2-NEXT: paddq %xmm0, %xmm0 2276 ; SSE2-NEXT: retq 2277 ; 2278 ; SSSE3-LABEL: zext_2i8_to_2i32: 2279 ; SSSE3: # %bb.0: 2280 ; SSSE3-NEXT: movzwl (%rdi), %eax 2281 ; SSSE3-NEXT: movd %eax, %xmm0 2282 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[3],zero,zero,zero 2283 ; SSSE3-NEXT: paddq %xmm0, %xmm0 2284 ; SSSE3-NEXT: retq 2285 ; 2286 ; SSE41-LABEL: zext_2i8_to_2i32: 2287 ; SSE41: # %bb.0: 2288 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 2289 ; SSE41-NEXT: paddq %xmm0, %xmm0 2290 ; SSE41-NEXT: retq 2291 ; 2292 ; AVX-LABEL: zext_2i8_to_2i32: 2293 ; AVX: # %bb.0: 2294 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 2295 ; AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 2296 ; AVX-NEXT: retq 2297 %x = load <2 x i8>, <2 x i8>* %addr, align 1 2298 %y = zext <2 x i8> %x to <2 x i32> 2299 %z = add <2 x i32>%y, %y 2300 ret <2 x i32>%z 2301 } 2302