1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX512 8 9 ; 10 ; 128-bit vectors 11 ; 12 13 define <2 x i64> @ext_i2_2i64(i2 %a0) { 14 ; SSE2-SSSE3-LABEL: ext_i2_2i64: 15 ; SSE2-SSSE3: # %bb.0: 16 ; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi 17 ; SSE2-SSSE3-NEXT: movq %rdi, %xmm0 18 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1] 19 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 20 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 21 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 22 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 23 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 24 ; SSE2-SSSE3-NEXT: retq 25 ; 26 ; AVX1-LABEL: ext_i2_2i64: 27 ; AVX1: # %bb.0: 28 ; AVX1-NEXT: # kill: def $edi killed $edi def $rdi 29 ; AVX1-NEXT: vmovq %rdi, %xmm0 30 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 31 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2] 32 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 33 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 34 ; AVX1-NEXT: retq 35 ; 36 ; AVX2-LABEL: ext_i2_2i64: 37 ; AVX2: # %bb.0: 38 ; AVX2-NEXT: # kill: def $edi killed $edi def $rdi 39 ; AVX2-NEXT: vmovq %rdi, %xmm0 40 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 41 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2] 42 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 43 ; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 44 ; AVX2-NEXT: retq 45 ; 46 ; AVX512-LABEL: ext_i2_2i64: 47 ; AVX512: # %bb.0: 48 ; AVX512-NEXT: kmovd %edi, %k1 49 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 50 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 51 ; AVX512-NEXT: retq 52 %1 = bitcast i2 %a0 to <2 x i1> 53 %2 = sext <2 x i1> %1 to <2 x i64> 54 ret <2 x i64> %2 55 } 56 57 define <4 x i32> @ext_i4_4i32(i4 %a0) { 58 ; SSE2-SSSE3-LABEL: ext_i4_4i32: 59 ; SSE2-SSSE3: # %bb.0: 60 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0 61 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 62 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8] 63 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 64 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 65 ; SSE2-SSSE3-NEXT: retq 66 ; 67 ; AVX1-LABEL: ext_i4_4i32: 68 ; AVX1: # %bb.0: 69 ; AVX1-NEXT: vmovd %edi, %xmm0 70 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 71 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8] 72 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 73 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 74 ; AVX1-NEXT: retq 75 ; 76 ; AVX2-LABEL: ext_i4_4i32: 77 ; AVX2: # %bb.0: 78 ; AVX2-NEXT: vmovd %edi, %xmm0 79 ; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 80 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8] 81 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 82 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 83 ; AVX2-NEXT: retq 84 ; 85 ; AVX512-LABEL: ext_i4_4i32: 86 ; AVX512: # %bb.0: 87 ; AVX512-NEXT: kmovd %edi, %k1 88 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 89 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 90 ; AVX512-NEXT: retq 91 %1 = bitcast i4 %a0 to <4 x i1> 92 %2 = sext <4 x i1> %1 to <4 x i32> 93 ret <4 x i32> %2 94 } 95 96 define <8 x i16> @ext_i8_8i16(i8 %a0) { 97 ; SSE2-SSSE3-LABEL: ext_i8_8i16: 98 ; SSE2-SSSE3: # %bb.0: 99 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0 100 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 101 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 102 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 103 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 104 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0 105 ; SSE2-SSSE3-NEXT: retq 106 ; 107 ; AVX1-LABEL: ext_i8_8i16: 108 ; AVX1: # %bb.0: 109 ; AVX1-NEXT: vmovd %edi, %xmm0 110 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 111 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 112 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 113 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 114 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 115 ; AVX1-NEXT: retq 116 ; 117 ; AVX2-LABEL: ext_i8_8i16: 118 ; AVX2: # %bb.0: 119 ; AVX2-NEXT: vmovd %edi, %xmm0 120 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 121 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 122 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 123 ; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 124 ; AVX2-NEXT: retq 125 ; 126 ; AVX512-LABEL: ext_i8_8i16: 127 ; AVX512: # %bb.0: 128 ; AVX512-NEXT: kmovd %edi, %k0 129 ; AVX512-NEXT: vpmovm2w %k0, %xmm0 130 ; AVX512-NEXT: retq 131 %1 = bitcast i8 %a0 to <8 x i1> 132 %2 = sext <8 x i1> %1 to <8 x i16> 133 ret <8 x i16> %2 134 } 135 136 define <16 x i8> @ext_i16_16i8(i16 %a0) { 137 ; SSE2-LABEL: ext_i16_16i8: 138 ; SSE2: # %bb.0: 139 ; SSE2-NEXT: movd %edi, %xmm0 140 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 141 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] 142 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 143 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 144 ; SSE2-NEXT: pand %xmm1, %xmm0 145 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 146 ; SSE2-NEXT: retq 147 ; 148 ; SSSE3-LABEL: ext_i16_16i8: 149 ; SSSE3: # %bb.0: 150 ; SSSE3-NEXT: movd %edi, %xmm0 151 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 152 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 153 ; SSSE3-NEXT: pand %xmm1, %xmm0 154 ; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0 155 ; SSSE3-NEXT: retq 156 ; 157 ; AVX1-LABEL: ext_i16_16i8: 158 ; AVX1: # %bb.0: 159 ; AVX1-NEXT: vmovd %edi, %xmm0 160 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 161 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 162 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 163 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 164 ; AVX1-NEXT: retq 165 ; 166 ; AVX2-LABEL: ext_i16_16i8: 167 ; AVX2: # %bb.0: 168 ; AVX2-NEXT: vmovd %edi, %xmm0 169 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 170 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] 171 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 172 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 173 ; AVX2-NEXT: retq 174 ; 175 ; AVX512-LABEL: ext_i16_16i8: 176 ; AVX512: # %bb.0: 177 ; AVX512-NEXT: kmovd %edi, %k0 178 ; AVX512-NEXT: vpmovm2b %k0, %xmm0 179 ; AVX512-NEXT: retq 180 %1 = bitcast i16 %a0 to <16 x i1> 181 %2 = sext <16 x i1> %1 to <16 x i8> 182 ret <16 x i8> %2 183 } 184 185 ; 186 ; 256-bit vectors 187 ; 188 189 define <4 x i64> @ext_i4_4i64(i4 %a0) { 190 ; SSE2-SSSE3-LABEL: ext_i4_4i64: 191 ; SSE2-SSSE3: # %bb.0: 192 ; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi 193 ; SSE2-SSSE3-NEXT: movq %rdi, %xmm0 194 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] 195 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 196 ; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1 197 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 198 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 199 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 200 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 201 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8] 202 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 203 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 204 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] 205 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 206 ; SSE2-SSSE3-NEXT: retq 207 ; 208 ; AVX1-LABEL: ext_i4_4i64: 209 ; AVX1: # %bb.0: 210 ; AVX1-NEXT: # kill: def $edi killed $edi def $rdi 211 ; AVX1-NEXT: vmovq %rdi, %xmm0 212 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 213 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 214 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 215 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 216 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 217 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 218 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 219 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 220 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 221 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 222 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 223 ; AVX1-NEXT: retq 224 ; 225 ; AVX2-LABEL: ext_i4_4i64: 226 ; AVX2: # %bb.0: 227 ; AVX2-NEXT: # kill: def $edi killed $edi def $rdi 228 ; AVX2-NEXT: vmovq %rdi, %xmm0 229 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 230 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8] 231 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 232 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 233 ; AVX2-NEXT: retq 234 ; 235 ; AVX512-LABEL: ext_i4_4i64: 236 ; AVX512: # %bb.0: 237 ; AVX512-NEXT: kmovd %edi, %k1 238 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 239 ; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} 240 ; AVX512-NEXT: retq 241 %1 = bitcast i4 %a0 to <4 x i1> 242 %2 = sext <4 x i1> %1 to <4 x i64> 243 ret <4 x i64> %2 244 } 245 246 define <8 x i32> @ext_i8_8i32(i8 %a0) { 247 ; SSE2-SSSE3-LABEL: ext_i8_8i32: 248 ; SSE2-SSSE3: # %bb.0: 249 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0 250 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 251 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8] 252 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 253 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 254 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 255 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 256 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 257 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1 258 ; SSE2-SSSE3-NEXT: retq 259 ; 260 ; AVX1-LABEL: ext_i8_8i32: 261 ; AVX1: # %bb.0: 262 ; AVX1-NEXT: vmovd %edi, %xmm0 263 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 264 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 265 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 266 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 267 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 268 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 269 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 270 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 271 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 272 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 273 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 274 ; AVX1-NEXT: retq 275 ; 276 ; AVX2-LABEL: ext_i8_8i32: 277 ; AVX2: # %bb.0: 278 ; AVX2-NEXT: vmovd %edi, %xmm0 279 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 280 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128] 281 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 282 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 283 ; AVX2-NEXT: retq 284 ; 285 ; AVX512-LABEL: ext_i8_8i32: 286 ; AVX512: # %bb.0: 287 ; AVX512-NEXT: kmovd %edi, %k1 288 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 289 ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 290 ; AVX512-NEXT: retq 291 %1 = bitcast i8 %a0 to <8 x i1> 292 %2 = sext <8 x i1> %1 to <8 x i32> 293 ret <8 x i32> %2 294 } 295 296 define <16 x i16> @ext_i16_16i16(i16 %a0) { 297 ; SSE2-SSSE3-LABEL: ext_i16_16i16: 298 ; SSE2-SSSE3: # %bb.0: 299 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0 300 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 301 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 302 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128] 303 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 304 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 305 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0 306 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768] 307 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 308 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1 309 ; SSE2-SSSE3-NEXT: retq 310 ; 311 ; AVX1-LABEL: ext_i16_16i16: 312 ; AVX1: # %bb.0: 313 ; AVX1-NEXT: vmovd %edi, %xmm0 314 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 315 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 316 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 317 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 318 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 319 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 320 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 321 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 322 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 323 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 324 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 325 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 326 ; AVX1-NEXT: retq 327 ; 328 ; AVX2-LABEL: ext_i16_16i16: 329 ; AVX2: # %bb.0: 330 ; AVX2-NEXT: vmovd %edi, %xmm0 331 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 332 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 333 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 334 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 335 ; AVX2-NEXT: retq 336 ; 337 ; AVX512-LABEL: ext_i16_16i16: 338 ; AVX512: # %bb.0: 339 ; AVX512-NEXT: kmovd %edi, %k0 340 ; AVX512-NEXT: vpmovm2w %k0, %ymm0 341 ; AVX512-NEXT: retq 342 %1 = bitcast i16 %a0 to <16 x i1> 343 %2 = sext <16 x i1> %1 to <16 x i16> 344 ret <16 x i16> %2 345 } 346 347 define <32 x i8> @ext_i32_32i8(i32 %a0) { 348 ; SSE2-SSSE3-LABEL: ext_i32_32i8: 349 ; SSE2-SSSE3: # %bb.0: 350 ; SSE2-SSSE3-NEXT: movd %edi, %xmm1 351 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 352 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 353 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 354 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 355 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 356 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0 357 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7] 358 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 359 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 360 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1 361 ; SSE2-SSSE3-NEXT: retq 362 ; 363 ; AVX1-LABEL: ext_i32_32i8: 364 ; AVX1: # %bb.0: 365 ; AVX1-NEXT: vmovd %edi, %xmm0 366 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 367 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] 368 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 369 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] 370 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 371 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 372 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 373 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 374 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 375 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 376 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 377 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 378 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 379 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 380 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 381 ; AVX1-NEXT: retq 382 ; 383 ; AVX2-SLOW-LABEL: ext_i32_32i8: 384 ; AVX2-SLOW: # %bb.0: 385 ; AVX2-SLOW-NEXT: vmovd %edi, %xmm0 386 ; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 387 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] 388 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 389 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] 390 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 391 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 392 ; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 393 ; AVX2-SLOW-NEXT: vpand %ymm1, %ymm0, %ymm0 394 ; AVX2-SLOW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 395 ; AVX2-SLOW-NEXT: retq 396 ; 397 ; AVX2-FAST-LABEL: ext_i32_32i8: 398 ; AVX2-FAST: # %bb.0: 399 ; AVX2-FAST-NEXT: vmovd %edi, %xmm0 400 ; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 401 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3] 402 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7] 403 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 404 ; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 405 ; AVX2-FAST-NEXT: vpand %ymm1, %ymm0, %ymm0 406 ; AVX2-FAST-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 407 ; AVX2-FAST-NEXT: retq 408 ; 409 ; AVX512-LABEL: ext_i32_32i8: 410 ; AVX512: # %bb.0: 411 ; AVX512-NEXT: kmovd %edi, %k0 412 ; AVX512-NEXT: vpmovm2b %k0, %ymm0 413 ; AVX512-NEXT: retq 414 %1 = bitcast i32 %a0 to <32 x i1> 415 %2 = sext <32 x i1> %1 to <32 x i8> 416 ret <32 x i8> %2 417 } 418 419 ; 420 ; 512-bit vectors 421 ; 422 423 define <8 x i64> @ext_i8_8i64(i8 %a0) { 424 ; SSE2-SSSE3-LABEL: ext_i8_8i64: 425 ; SSE2-SSSE3: # %bb.0: 426 ; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi 427 ; SSE2-SSSE3-NEXT: movq %rdi, %xmm0 428 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1] 429 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 430 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1 431 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 432 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 433 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 434 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 435 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8] 436 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2 437 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 438 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 439 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] 440 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 441 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32] 442 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3 443 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3 444 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3 445 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2] 446 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2 447 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128] 448 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4 449 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4 450 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2] 451 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 452 ; SSE2-SSSE3-NEXT: retq 453 ; 454 ; AVX1-LABEL: ext_i8_8i64: 455 ; AVX1: # %bb.0: 456 ; AVX1-NEXT: # kill: def $edi killed $edi def $rdi 457 ; AVX1-NEXT: vmovq %rdi, %xmm0 458 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 459 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 460 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0 461 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 462 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 463 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 464 ; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 465 ; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 466 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0 467 ; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 468 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 469 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 470 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 471 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 472 ; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 473 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1 474 ; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 475 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 476 ; AVX1-NEXT: retq 477 ; 478 ; AVX2-LABEL: ext_i8_8i64: 479 ; AVX2: # %bb.0: 480 ; AVX2-NEXT: # kill: def $edi killed $edi def $rdi 481 ; AVX2-NEXT: vmovq %rdi, %xmm0 482 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1 483 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8] 484 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2 485 ; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0 486 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128] 487 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 488 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1 489 ; AVX2-NEXT: retq 490 ; 491 ; AVX512-LABEL: ext_i8_8i64: 492 ; AVX512: # %bb.0: 493 ; AVX512-NEXT: kmovd %edi, %k1 494 ; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 495 ; AVX512-NEXT: retq 496 %1 = bitcast i8 %a0 to <8 x i1> 497 %2 = sext <8 x i1> %1 to <8 x i64> 498 ret <8 x i64> %2 499 } 500 501 define <16 x i32> @ext_i16_16i32(i16 %a0) { 502 ; SSE2-SSSE3-LABEL: ext_i16_16i32: 503 ; SSE2-SSSE3: # %bb.0: 504 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0 505 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 506 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8] 507 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0 508 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 509 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 510 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 511 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1 512 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 513 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1 514 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048] 515 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 516 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 517 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2 518 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768] 519 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 520 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3 521 ; SSE2-SSSE3-NEXT: retq 522 ; 523 ; AVX1-LABEL: ext_i16_16i32: 524 ; AVX1: # %bb.0: 525 ; AVX1-NEXT: vmovd %edi, %xmm0 526 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 527 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 528 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0 529 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 530 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 531 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 532 ; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 533 ; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 534 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 535 ; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 536 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 537 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 538 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 539 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 540 ; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 541 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 542 ; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 543 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 544 ; AVX1-NEXT: retq 545 ; 546 ; AVX2-LABEL: ext_i16_16i32: 547 ; AVX2: # %bb.0: 548 ; AVX2-NEXT: vmovd %edi, %xmm0 549 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1 550 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128] 551 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2 552 ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0 553 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768] 554 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 555 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1 556 ; AVX2-NEXT: retq 557 ; 558 ; AVX512-LABEL: ext_i16_16i32: 559 ; AVX512: # %bb.0: 560 ; AVX512-NEXT: kmovd %edi, %k1 561 ; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 562 ; AVX512-NEXT: retq 563 %1 = bitcast i16 %a0 to <16 x i1> 564 %2 = sext <16 x i1> %1 to <16 x i32> 565 ret <16 x i32> %2 566 } 567 568 define <32 x i16> @ext_i32_32i16(i32 %a0) { 569 ; SSE2-SSSE3-LABEL: ext_i32_32i16: 570 ; SSE2-SSSE3: # %bb.0: 571 ; SSE2-SSSE3-NEXT: movd %edi, %xmm2 572 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7] 573 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 574 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128] 575 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 576 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 577 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0 578 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768] 579 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 580 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1 581 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7] 582 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] 583 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 584 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 585 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2 586 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3 587 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3 588 ; SSE2-SSSE3-NEXT: retq 589 ; 590 ; AVX1-LABEL: ext_i32_32i16: 591 ; AVX1: # %bb.0: 592 ; AVX1-NEXT: vmovd %edi, %xmm1 593 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7] 594 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 595 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 596 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 597 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 598 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 599 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 600 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3 601 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 602 ; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3 603 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0 604 ; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0 605 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 606 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7] 607 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 608 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 609 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 610 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 611 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2 612 ; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 613 ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1 614 ; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1 615 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 616 ; AVX1-NEXT: retq 617 ; 618 ; AVX2-LABEL: ext_i32_32i16: 619 ; AVX2: # %bb.0: 620 ; AVX2-NEXT: vmovd %edi, %xmm0 621 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 622 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 623 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 624 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 625 ; AVX2-NEXT: shrl $16, %edi 626 ; AVX2-NEXT: vmovd %edi, %xmm2 627 ; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2 628 ; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2 629 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1 630 ; AVX2-NEXT: retq 631 ; 632 ; AVX512-LABEL: ext_i32_32i16: 633 ; AVX512: # %bb.0: 634 ; AVX512-NEXT: kmovd %edi, %k0 635 ; AVX512-NEXT: vpmovm2w %k0, %zmm0 636 ; AVX512-NEXT: retq 637 %1 = bitcast i32 %a0 to <32 x i1> 638 %2 = sext <32 x i1> %1 to <32 x i16> 639 ret <32 x i16> %2 640 } 641 642 define <64 x i8> @ext_i64_64i8(i64 %a0) { 643 ; SSE2-SSSE3-LABEL: ext_i64_64i8: 644 ; SSE2-SSSE3: # %bb.0: 645 ; SSE2-SSSE3-NEXT: movq %rdi, %xmm3 646 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 647 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7] 648 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 649 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 650 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 651 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0 652 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7] 653 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 654 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 655 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1 656 ; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5] 657 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] 658 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 659 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2 660 ; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7] 661 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] 662 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 663 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3 664 ; SSE2-SSSE3-NEXT: retq 665 ; 666 ; AVX1-LABEL: ext_i64_64i8: 667 ; AVX1: # %bb.0: 668 ; AVX1-NEXT: vmovq %rdi, %xmm0 669 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 670 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 671 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 672 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7] 673 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] 674 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 675 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 676 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 677 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 678 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 679 ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3 680 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 681 ; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3 682 ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0 683 ; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0 684 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 685 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5] 686 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] 687 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7] 688 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 689 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 690 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 691 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 692 ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2 693 ; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 694 ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1 695 ; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1 696 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 697 ; AVX1-NEXT: retq 698 ; 699 ; AVX2-SLOW-LABEL: ext_i64_64i8: 700 ; AVX2-SLOW: # %bb.0: 701 ; AVX2-SLOW-NEXT: vmovq %rdi, %xmm0 702 ; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 703 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 704 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 705 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7] 706 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] 707 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 708 ; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 709 ; AVX2-SLOW-NEXT: vpand %ymm2, %ymm0, %ymm0 710 ; AVX2-SLOW-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 711 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5] 712 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] 713 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7] 714 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 715 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1 716 ; AVX2-SLOW-NEXT: vpand %ymm2, %ymm1, %ymm1 717 ; AVX2-SLOW-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 718 ; AVX2-SLOW-NEXT: retq 719 ; 720 ; AVX2-FAST-LABEL: ext_i64_64i8: 721 ; AVX2-FAST: # %bb.0: 722 ; AVX2-FAST-NEXT: vmovq %rdi, %xmm0 723 ; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 724 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm1[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3] 725 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm1[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7] 726 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 727 ; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 728 ; AVX2-FAST-NEXT: vpand %ymm2, %ymm0, %ymm0 729 ; AVX2-FAST-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 730 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[8,9,8,9,8,9,8,9,10,11,10,11,10,11,10,11] 731 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,12,13,12,13,12,13,14,15,14,15,14,15,14,15] 732 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1 733 ; AVX2-FAST-NEXT: vpand %ymm2, %ymm1, %ymm1 734 ; AVX2-FAST-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 735 ; AVX2-FAST-NEXT: retq 736 ; 737 ; AVX512-LABEL: ext_i64_64i8: 738 ; AVX512: # %bb.0: 739 ; AVX512-NEXT: kmovq %rdi, %k0 740 ; AVX512-NEXT: vpmovm2b %k0, %zmm0 741 ; AVX512-NEXT: retq 742 %1 = bitcast i64 %a0 to <64 x i1> 743 %2 = sext <64 x i1> %1 to <64 x i8> 744 ret <64 x i8> %2 745 } 746