1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5 6 define <4 x i32> @trunc2x2i64(<2 x i64> %a, <2 x i64> %b) { 7 ; SSE2-LABEL: trunc2x2i64: 8 ; SSE2: # BB#0: # %entry 9 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 10 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 11 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 12 ; SSE2-NEXT: retq 13 ; 14 ; SSSE3-LABEL: trunc2x2i64: 15 ; SSSE3: # BB#0: # %entry 16 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 17 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 18 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 19 ; SSSE3-NEXT: retq 20 ; 21 ; SSE41-LABEL: trunc2x2i64: 22 ; SSE41: # BB#0: # %entry 23 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] 24 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 25 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 26 ; SSE41-NEXT: retq 27 ; 28 ; AVX-LABEL: trunc2x2i64: 29 ; AVX: # BB#0: # %entry 30 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] 31 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 32 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 33 ; AVX-NEXT: retq 34 35 36 entry: 37 %0 = trunc <2 x i64> %a to <2 x i32> 38 %1 = trunc <2 x i64> %b to <2 x i32> 39 %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 40 ret <4 x i32> %2 41 } 42 43 define i64 @trunc2i64(<2 x i64> %inval) { 44 ; SSE-LABEL: trunc2i64: 45 ; SSE: # BB#0: # %entry 46 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 47 ; SSE-NEXT: movd %xmm0, %rax 48 ; SSE-NEXT: retq 49 ; 50 ; AVX-LABEL: trunc2i64: 51 ; AVX: # BB#0: # %entry 52 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 53 ; AVX-NEXT: vmovq %xmm0, %rax 54 ; AVX-NEXT: retq 55 56 57 entry: 58 %0 = trunc <2 x i64> %inval to <2 x i32> 59 %1 = bitcast <2 x i32> %0 to i64 60 ret i64 %1 61 } 62 63 define <8 x i16> @trunc2x4i32(<4 x i32> %a, <4 x i32> %b) { 64 ; SSE2-LABEL: trunc2x4i32: 65 ; SSE2: # BB#0: # %entry 66 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 67 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 68 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 69 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 70 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 71 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 72 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 73 ; SSE2-NEXT: retq 74 ; 75 ; SSSE3-LABEL: trunc2x4i32: 76 ; SSSE3: # BB#0: # %entry 77 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 78 ; SSSE3-NEXT: pshufb %xmm2, %xmm1 79 ; SSSE3-NEXT: pshufb %xmm2, %xmm0 80 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 81 ; SSSE3-NEXT: retq 82 ; 83 ; SSE41-LABEL: trunc2x4i32: 84 ; SSE41: # BB#0: # %entry 85 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 86 ; SSE41-NEXT: pshufb %xmm2, %xmm1 87 ; SSE41-NEXT: pshufb %xmm2, %xmm0 88 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 89 ; SSE41-NEXT: retq 90 ; 91 ; AVX-LABEL: trunc2x4i32: 92 ; AVX: # BB#0: # %entry 93 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 94 ; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 95 ; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 96 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 97 ; AVX-NEXT: retq 98 99 100 101 102 entry: 103 %0 = trunc <4 x i32> %a to <4 x i16> 104 %1 = trunc <4 x i32> %b to <4 x i16> 105 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 106 ret <8 x i16> %2 107 } 108 109 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524 110 define i64 @trunc4i32(<4 x i32> %inval) { 111 ; SSE2-LABEL: trunc4i32: 112 ; SSE2: # BB#0: # %entry 113 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 114 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 115 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 116 ; SSE2-NEXT: movd %xmm0, %rax 117 ; SSE2-NEXT: retq 118 ; 119 ; SSSE3-LABEL: trunc4i32: 120 ; SSSE3: # BB#0: # %entry 121 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 122 ; SSSE3-NEXT: movd %xmm0, %rax 123 ; SSSE3-NEXT: retq 124 ; 125 ; SSE41-LABEL: trunc4i32: 126 ; SSE41: # BB#0: # %entry 127 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 128 ; SSE41-NEXT: movd %xmm0, %rax 129 ; SSE41-NEXT: retq 130 ; 131 ; AVX-LABEL: trunc4i32: 132 ; AVX: # BB#0: # %entry 133 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 134 ; AVX-NEXT: vmovq %xmm0, %rax 135 ; AVX-NEXT: retq 136 137 138 139 140 entry: 141 %0 = trunc <4 x i32> %inval to <4 x i16> 142 %1 = bitcast <4 x i16> %0 to i64 143 ret i64 %1 144 } 145 146 define <16 x i8> @trunc2x8i16(<8 x i16> %a, <8 x i16> %b) { 147 ; SSE2-LABEL: trunc2x8i16: 148 ; SSE2: # BB#0: # %entry 149 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 150 ; SSE2-NEXT: pand %xmm2, %xmm1 151 ; SSE2-NEXT: pand %xmm2, %xmm0 152 ; SSE2-NEXT: packuswb %xmm1, %xmm0 153 ; SSE2-NEXT: retq 154 ; 155 ; SSSE3-LABEL: trunc2x8i16: 156 ; SSSE3: # BB#0: # %entry 157 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 158 ; SSSE3-NEXT: pshufb %xmm2, %xmm1 159 ; SSSE3-NEXT: pshufb %xmm2, %xmm0 160 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 161 ; SSSE3-NEXT: retq 162 ; 163 ; SSE41-LABEL: trunc2x8i16: 164 ; SSE41: # BB#0: # %entry 165 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 166 ; SSE41-NEXT: pshufb %xmm2, %xmm1 167 ; SSE41-NEXT: pshufb %xmm2, %xmm0 168 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 169 ; SSE41-NEXT: retq 170 ; 171 ; AVX-LABEL: trunc2x8i16: 172 ; AVX: # BB#0: # %entry 173 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 174 ; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 175 ; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 176 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 177 ; AVX-NEXT: retq 178 179 180 181 182 entry: 183 %0 = trunc <8 x i16> %a to <8 x i8> 184 %1 = trunc <8 x i16> %b to <8 x i8> 185 %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 186 ret <16 x i8> %2 187 } 188 189 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524 190 define i64 @trunc8i16(<8 x i16> %inval) { 191 ; SSE2-LABEL: trunc8i16: 192 ; SSE2: # BB#0: # %entry 193 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 194 ; SSE2-NEXT: packuswb %xmm0, %xmm0 195 ; SSE2-NEXT: movd %xmm0, %rax 196 ; SSE2-NEXT: retq 197 ; 198 ; SSSE3-LABEL: trunc8i16: 199 ; SSSE3: # BB#0: # %entry 200 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 201 ; SSSE3-NEXT: movd %xmm0, %rax 202 ; SSSE3-NEXT: retq 203 ; 204 ; SSE41-LABEL: trunc8i16: 205 ; SSE41: # BB#0: # %entry 206 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 207 ; SSE41-NEXT: movd %xmm0, %rax 208 ; SSE41-NEXT: retq 209 ; 210 ; AVX-LABEL: trunc8i16: 211 ; AVX: # BB#0: # %entry 212 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 213 ; AVX-NEXT: vmovq %xmm0, %rax 214 ; AVX-NEXT: retq 215 216 217 218 219 entry: 220 %0 = trunc <8 x i16> %inval to <8 x i8> 221 %1 = bitcast <8 x i8> %0 to i64 222 ret i64 %1 223 } 224 225 define <16 x i8> @trunc16i64_const() { 226 ; SSE-LABEL: trunc16i64_const 227 ; SSE: # BB#0: # %entry 228 ; SSE-NEXT: xorps %xmm0, %xmm0 229 ; SSE-NEXT: retq 230 ; 231 ; AVX-LABEL: trunc16i64_const 232 ; AVX: # BB#0: # %entry 233 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 234 ; AVX-NEXT: retq 235 236 entry: 237 %0 = trunc <16 x i64> zeroinitializer to <16 x i8> 238 %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26> 239 ret <16 x i8> %1 240 } 241