1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW 6 7 define <4 x i32> @test1(<8 x i32> %v) { 8 ; SSE2-LABEL: test1: 9 ; SSE2: # %bb.0: 10 ; SSE2-NEXT: retq 11 ; 12 ; AVX-LABEL: test1: 13 ; AVX: # %bb.0: 14 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 15 ; AVX-NEXT: vzeroupper 16 ; AVX-NEXT: retq 17 %x = sext <8 x i32> %v to <8 x i64> 18 %s = shufflevector <8 x i64> %x, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 19 %t = trunc <4 x i64> %s to <4 x i32> 20 ret <4 x i32> %t 21 } 22 23 define <4 x i32> @test2(<8 x i32> %v) { 24 ; SSE2-LABEL: test2: 25 ; SSE2: # %bb.0: 26 ; SSE2-NEXT: movaps %xmm1, %xmm0 27 ; SSE2-NEXT: retq 28 ; 29 ; AVX-LABEL: test2: 30 ; AVX: # %bb.0: 31 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 32 ; AVX-NEXT: vzeroupper 33 ; AVX-NEXT: retq 34 %x = sext <8 x i32> %v to <8 x i64> 35 %s = shufflevector <8 x i64> %x, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 36 %t = trunc <4 x i64> %s to <4 x i32> 37 ret <4 x i32> %t 38 } 39 40 define <2 x i32> @test3(<8 x i32> %v) { 41 ; SSE2-LABEL: test3: 42 ; SSE2: # %bb.0: 43 ; SSE2-NEXT: movdqa %xmm1, %xmm0 44 ; SSE2-NEXT: psrad $31, %xmm0 45 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 46 ; SSE2-NEXT: movdqa %xmm1, %xmm0 47 ; SSE2-NEXT: retq 48 ; 49 ; AVX2-LABEL: test3: 50 ; AVX2: # %bb.0: 51 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 52 ; AVX2-NEXT: vpmovsxdq %xmm0, %xmm0 53 ; AVX2-NEXT: vzeroupper 54 ; AVX2-NEXT: retq 55 ; 56 ; AVX512-LABEL: test3: 57 ; AVX512: # %bb.0: 58 ; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0 59 ; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm0 60 ; AVX512-NEXT: vzeroupper 61 ; AVX512-NEXT: retq 62 %x = sext <8 x i32> %v to <8 x i64> 63 %s = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 4, i32 5> 64 %t = trunc <2 x i64> %s to <2 x i32> 65 ret <2 x i32> %t 66 } 67 68 define <2 x i32> @test4(<8 x i32> %v) { 69 ; SSE2-LABEL: test4: 70 ; SSE2: # %bb.0: 71 ; SSE2-NEXT: movdqa %xmm0, %xmm1 72 ; SSE2-NEXT: psrad $31, %xmm1 73 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 74 ; SSE2-NEXT: retq 75 ; 76 ; AVX2-LABEL: test4: 77 ; AVX2: # %bb.0: 78 ; AVX2-NEXT: vpmovsxdq %xmm0, %xmm0 79 ; AVX2-NEXT: vzeroupper 80 ; AVX2-NEXT: retq 81 ; 82 ; AVX512-LABEL: test4: 83 ; AVX512: # %bb.0: 84 ; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0 85 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 86 ; AVX512-NEXT: vzeroupper 87 ; AVX512-NEXT: retq 88 %x = sext <8 x i32> %v to <8 x i64> 89 %s = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 0, i32 1> 90 %t = trunc <2 x i64> %s to <2 x i32> 91 ret <2 x i32> %t 92 } 93 94 define <2 x i32> @test5(<8 x i32> %v) { 95 ; SSE2-LABEL: test5: 96 ; SSE2: # %bb.0: 97 ; SSE2-NEXT: movdqa %xmm1, %xmm2 98 ; SSE2-NEXT: psrad $31, %xmm2 99 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 100 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 101 ; SSE2-NEXT: movdqa %xmm0, %xmm2 102 ; SSE2-NEXT: psrad $31, %xmm2 103 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 104 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 105 ; SSE2-NEXT: retq 106 ; 107 ; AVX2-LABEL: test5: 108 ; AVX2: # %bb.0: 109 ; AVX2-NEXT: vpmovsxdq %xmm0, %ymm1 110 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 111 ; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 112 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7],ymm1[24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23] 113 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,2,3] 114 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 115 ; AVX2-NEXT: vzeroupper 116 ; AVX2-NEXT: retq 117 ; 118 ; AVX512-LABEL: test5: 119 ; AVX512: # %bb.0: 120 ; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0 121 ; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm1 122 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 123 ; AVX512-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 124 ; AVX512-NEXT: vzeroupper 125 ; AVX512-NEXT: retq 126 %x = sext <8 x i32> %v to <8 x i64> 127 %s = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 3, i32 4> 128 %t = trunc <2 x i64> %s to <2 x i32> 129 ret <2 x i32> %t 130 } 131 132 define <4 x i32> @test6(<8 x i32> %v) { 133 ; SSE2-LABEL: test6: 134 ; SSE2: # %bb.0: 135 ; SSE2-NEXT: retq 136 ; 137 ; AVX-LABEL: test6: 138 ; AVX: # %bb.0: 139 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 140 ; AVX-NEXT: vzeroupper 141 ; AVX-NEXT: retq 142 %x = zext <8 x i32> %v to <8 x i64> 143 %s = shufflevector <8 x i64> %x, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 144 %t = trunc <4 x i64> %s to <4 x i32> 145 ret <4 x i32> %t 146 } 147 148 define <4 x i32> @test7(<8 x i32> %v) { 149 ; SSE2-LABEL: test7: 150 ; SSE2: # %bb.0: 151 ; SSE2-NEXT: movaps %xmm1, %xmm0 152 ; SSE2-NEXT: retq 153 ; 154 ; AVX-LABEL: test7: 155 ; AVX: # %bb.0: 156 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 157 ; AVX-NEXT: vzeroupper 158 ; AVX-NEXT: retq 159 %x = zext <8 x i32> %v to <8 x i64> 160 %s = shufflevector <8 x i64> %x, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 161 %t = trunc <4 x i64> %s to <4 x i32> 162 ret <4 x i32> %t 163 } 164 165 define <2 x i32> @test8(<8 x i32> %v) { 166 ; SSE2-LABEL: test8: 167 ; SSE2: # %bb.0: 168 ; SSE2-NEXT: xorps %xmm0, %xmm0 169 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 170 ; SSE2-NEXT: movaps %xmm1, %xmm0 171 ; SSE2-NEXT: retq 172 ; 173 ; AVX2-LABEL: test8: 174 ; AVX2: # %bb.0: 175 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 176 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 177 ; AVX2-NEXT: vzeroupper 178 ; AVX2-NEXT: retq 179 ; 180 ; AVX512-LABEL: test8: 181 ; AVX512: # %bb.0: 182 ; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 183 ; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm0 184 ; AVX512-NEXT: vzeroupper 185 ; AVX512-NEXT: retq 186 %x = zext <8 x i32> %v to <8 x i64> 187 %s = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 4, i32 5> 188 %t = trunc <2 x i64> %s to <2 x i32> 189 ret <2 x i32> %t 190 } 191 192 define <2 x i32> @test9(<8 x i32> %v) { 193 ; SSE2-LABEL: test9: 194 ; SSE2: # %bb.0: 195 ; SSE2-NEXT: xorps %xmm1, %xmm1 196 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 197 ; SSE2-NEXT: retq 198 ; 199 ; AVX2-LABEL: test9: 200 ; AVX2: # %bb.0: 201 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 202 ; AVX2-NEXT: vzeroupper 203 ; AVX2-NEXT: retq 204 ; 205 ; AVX512-LABEL: test9: 206 ; AVX512: # %bb.0: 207 ; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 208 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 209 ; AVX512-NEXT: vzeroupper 210 ; AVX512-NEXT: retq 211 %x = zext <8 x i32> %v to <8 x i64> 212 %s = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 0, i32 1> 213 %t = trunc <2 x i64> %s to <2 x i32> 214 ret <2 x i32> %t 215 } 216 217 define <2 x i32> @test10(<8 x i32> %v) { 218 ; SSE2-LABEL: test10: 219 ; SSE2: # %bb.0: 220 ; SSE2-NEXT: xorpd %xmm2, %xmm2 221 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 222 ; SSE2-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 223 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 224 ; SSE2-NEXT: retq 225 ; 226 ; AVX2-LABEL: test10: 227 ; AVX2: # %bb.0: 228 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 229 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 230 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 231 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7],ymm1[24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23] 232 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,2,3] 233 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 234 ; AVX2-NEXT: vzeroupper 235 ; AVX2-NEXT: retq 236 ; 237 ; AVX512-LABEL: test10: 238 ; AVX512: # %bb.0: 239 ; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 240 ; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm1 241 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 242 ; AVX512-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 243 ; AVX512-NEXT: vzeroupper 244 ; AVX512-NEXT: retq 245 %x = zext <8 x i32> %v to <8 x i64> 246 %s = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 3, i32 4> 247 %t = trunc <2 x i64> %s to <2 x i32> 248 ret <2 x i32> %t 249 } 250