1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VLBW 9 10 ; 11 ; 128-bit vectors 12 ; 13 14 define <2 x i64> @ext_i2_2i64(i2 %a0) { 15 ; SSE2-SSSE3-LABEL: ext_i2_2i64: 16 ; SSE2-SSSE3: # %bb.0: 17 ; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi 18 ; SSE2-SSSE3-NEXT: movq %rdi, %xmm0 19 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1] 20 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 21 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 22 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 23 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 24 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 25 ; SSE2-SSSE3-NEXT: psrlq $63, %xmm0 26 ; SSE2-SSSE3-NEXT: retq 27 ; 28 ; AVX1-LABEL: ext_i2_2i64: 29 ; AVX1: # %bb.0: 30 ; AVX1-NEXT: # kill: def $edi killed $edi def $rdi 31 ; AVX1-NEXT: vmovq %rdi, %xmm0 32 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 33 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2] 34 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 35 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 36 ; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0 37 ; AVX1-NEXT: retq 38 ; 39 ; AVX2-LABEL: ext_i2_2i64: 40 ; AVX2: # %bb.0: 41 ; AVX2-NEXT: # kill: def $edi killed $edi def $rdi 42 ; AVX2-NEXT: vmovq %rdi, %xmm0 43 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 44 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2] 45 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 46 ; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 47 ; AVX2-NEXT: vpsrlq $63, %xmm0, %xmm0 48 ; AVX2-NEXT: retq 49 ; 50 ; AVX512F-LABEL: ext_i2_2i64: 51 ; AVX512F: # %bb.0: 52 ; AVX512F-NEXT: kmovw %edi, %k1 53 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 54 ; AVX512F-NEXT: vpsrlq $63, %xmm0, %xmm0 55 ; AVX512F-NEXT: vzeroupper 56 ; AVX512F-NEXT: retq 57 ; 58 ; AVX512VLBW-LABEL: ext_i2_2i64: 59 ; AVX512VLBW: # %bb.0: 60 ; AVX512VLBW-NEXT: kmovd %edi, %k1 61 ; AVX512VLBW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 62 ; AVX512VLBW-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 63 ; AVX512VLBW-NEXT: vpsrlq $63, %xmm0, %xmm0 64 ; AVX512VLBW-NEXT: retq 65 %1 = bitcast i2 %a0 to <2 x i1> 66 %2 = zext <2 x i1> %1 to <2 x i64> 67 ret <2 x i64> %2 68 } 69 70 define <4 x i32> @ext_i4_4i32(i4 %a0) { 71 ; SSE2-SSSE3-LABEL: ext_i4_4i32: 72 ; SSE2-SSSE3: # %bb.0: 73 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0 74 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 75 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8] 76 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 77 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 78 ; SSE2-SSSE3-NEXT: psrld $31, %xmm0 79 ; SSE2-SSSE3-NEXT: retq 80 ; 81 ; AVX1-LABEL: ext_i4_4i32: 82 ; AVX1: # %bb.0: 83 ; AVX1-NEXT: vmovd %edi, %xmm0 84 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 85 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8] 86 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 87 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 88 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 89 ; AVX1-NEXT: retq 90 ; 91 ; AVX2-LABEL: ext_i4_4i32: 92 ; AVX2: # %bb.0: 93 ; AVX2-NEXT: vmovd %edi, %xmm0 94 ; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 95 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8] 96 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 97 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 98 ; AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 99 ; AVX2-NEXT: retq 100 ; 101 ; AVX512F-LABEL: ext_i4_4i32: 102 ; AVX512F: # %bb.0: 103 ; AVX512F-NEXT: kmovw %edi, %k1 104 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 105 ; AVX512F-NEXT: vpsrld $31, %xmm0, %xmm0 106 ; AVX512F-NEXT: vzeroupper 107 ; AVX512F-NEXT: retq 108 ; 109 ; AVX512VLBW-LABEL: ext_i4_4i32: 110 ; AVX512VLBW: # %bb.0: 111 ; AVX512VLBW-NEXT: kmovd %edi, %k1 112 ; AVX512VLBW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 113 ; AVX512VLBW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 114 ; AVX512VLBW-NEXT: vpsrld $31, %xmm0, %xmm0 115 ; AVX512VLBW-NEXT: retq 116 %1 = bitcast i4 %a0 to <4 x i1> 117 %2 = zext <4 x i1> %1 to <4 x i32> 118 ret <4 x i32> %2 119 } 120 121 define <8 x i16> @ext_i8_8i16(i8 %a0) { 122 ; SSE2-SSSE3-LABEL: ext_i8_8i16: 123 ; SSE2-SSSE3: # %bb.0: 124 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0 125 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 126 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 127 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 128 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 129 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0 130 ; SSE2-SSSE3-NEXT: psrlw $15, %xmm0 131 ; SSE2-SSSE3-NEXT: retq 132 ; 133 ; AVX1-LABEL: ext_i8_8i16: 134 ; AVX1: # %bb.0: 135 ; AVX1-NEXT: vmovd %edi, %xmm0 136 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 137 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 138 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 139 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 140 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 141 ; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0 142 ; AVX1-NEXT: retq 143 ; 144 ; AVX2-LABEL: ext_i8_8i16: 145 ; AVX2: # %bb.0: 146 ; AVX2-NEXT: vmovd %edi, %xmm0 147 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 148 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 149 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 150 ; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 151 ; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0 152 ; AVX2-NEXT: retq 153 ; 154 ; AVX512F-LABEL: ext_i8_8i16: 155 ; AVX512F: # %bb.0: 156 ; AVX512F-NEXT: kmovw %edi, %k1 157 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 158 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 159 ; AVX512F-NEXT: vpsrlw $15, %xmm0, %xmm0 160 ; AVX512F-NEXT: vzeroupper 161 ; AVX512F-NEXT: retq 162 ; 163 ; AVX512VLBW-LABEL: ext_i8_8i16: 164 ; AVX512VLBW: # %bb.0: 165 ; AVX512VLBW-NEXT: kmovd %edi, %k0 166 ; AVX512VLBW-NEXT: vpmovm2w %k0, %xmm0 167 ; AVX512VLBW-NEXT: vpsrlw $15, %xmm0, %xmm0 168 ; AVX512VLBW-NEXT: retq 169 %1 = bitcast i8 %a0 to <8 x i1> 170 %2 = zext <8 x i1> %1 to <8 x i16> 171 ret <8 x i16> %2 172 } 173 174 define <16 x i8> @ext_i16_16i8(i16 %a0) { 175 ; SSE2-LABEL: ext_i16_16i8: 176 ; SSE2: # %bb.0: 177 ; SSE2-NEXT: movd %edi, %xmm0 178 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 179 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] 180 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 181 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 182 ; SSE2-NEXT: pand %xmm1, %xmm0 183 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 184 ; SSE2-NEXT: psrlw $7, %xmm0 185 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 186 ; SSE2-NEXT: retq 187 ; 188 ; SSSE3-LABEL: ext_i16_16i8: 189 ; SSSE3: # %bb.0: 190 ; SSSE3-NEXT: movd %edi, %xmm0 191 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 192 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 193 ; SSSE3-NEXT: pand %xmm1, %xmm0 194 ; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0 195 ; SSSE3-NEXT: psrlw $7, %xmm0 196 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0 197 ; SSSE3-NEXT: retq 198 ; 199 ; AVX1-LABEL: ext_i16_16i8: 200 ; AVX1: # %bb.0: 201 ; AVX1-NEXT: vmovd %edi, %xmm0 202 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 203 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 204 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 205 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 206 ; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0 207 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 208 ; AVX1-NEXT: retq 209 ; 210 ; AVX2-LABEL: ext_i16_16i8: 211 ; AVX2: # %bb.0: 212 ; AVX2-NEXT: vmovd %edi, %xmm0 213 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 214 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] 215 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 216 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 217 ; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0 218 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 219 ; AVX2-NEXT: retq 220 ; 221 ; AVX512F-LABEL: ext_i16_16i8: 222 ; AVX512F: # %bb.0: 223 ; AVX512F-NEXT: kmovw %edi, %k1 224 ; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} 225 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 226 ; AVX512F-NEXT: vzeroupper 227 ; AVX512F-NEXT: retq 228 ; 229 ; AVX512VLBW-LABEL: ext_i16_16i8: 230 ; AVX512VLBW: # %bb.0: 231 ; AVX512VLBW-NEXT: kmovd %edi, %k1 232 ; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %xmm0 {%k1} {z} 233 ; AVX512VLBW-NEXT: retq 234 %1 = bitcast i16 %a0 to <16 x i1> 235 %2 = zext <16 x i1> %1 to <16 x i8> 236 ret <16 x i8> %2 237 } 238 239 ; 240 ; 256-bit vectors 241 ; 242 243 define <4 x i64> @ext_i4_4i64(i4 %a0) { 244 ; SSE2-SSSE3-LABEL: ext_i4_4i64: 245 ; SSE2-SSSE3: # %bb.0: 246 ; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi 247 ; SSE2-SSSE3-NEXT: movq %rdi, %xmm0 248 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] 249 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 250 ; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1 251 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 252 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 253 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 254 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 255 ; SSE2-SSSE3-NEXT: psrlq $63, %xmm0 256 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8] 257 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 258 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 259 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] 260 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 261 ; SSE2-SSSE3-NEXT: psrlq $63, %xmm1 262 ; SSE2-SSSE3-NEXT: retq 263 ; 264 ; AVX1-LABEL: ext_i4_4i64: 265 ; AVX1: # %bb.0: 266 ; AVX1-NEXT: # kill: def $edi killed $edi def $rdi 267 ; AVX1-NEXT: vmovq %rdi, %xmm0 268 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 269 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 270 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 271 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 272 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2 273 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 274 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 275 ; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2 276 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 277 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 278 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 279 ; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0 280 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 281 ; AVX1-NEXT: retq 282 ; 283 ; AVX2-LABEL: ext_i4_4i64: 284 ; AVX2: # %bb.0: 285 ; AVX2-NEXT: # kill: def $edi killed $edi def $rdi 286 ; AVX2-NEXT: vmovq %rdi, %xmm0 287 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 288 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8] 289 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 290 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 291 ; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0 292 ; AVX2-NEXT: retq 293 ; 294 ; AVX512F-LABEL: ext_i4_4i64: 295 ; AVX512F: # %bb.0: 296 ; AVX512F-NEXT: kmovw %edi, %k1 297 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 298 ; AVX512F-NEXT: vpsrlq $63, %ymm0, %ymm0 299 ; AVX512F-NEXT: retq 300 ; 301 ; AVX512VLBW-LABEL: ext_i4_4i64: 302 ; AVX512VLBW: # %bb.0: 303 ; AVX512VLBW-NEXT: kmovd %edi, %k1 304 ; AVX512VLBW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 305 ; AVX512VLBW-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} 306 ; AVX512VLBW-NEXT: vpsrlq $63, %ymm0, %ymm0 307 ; AVX512VLBW-NEXT: retq 308 %1 = bitcast i4 %a0 to <4 x i1> 309 %2 = zext <4 x i1> %1 to <4 x i64> 310 ret <4 x i64> %2 311 } 312 313 define <8 x i32> @ext_i8_8i32(i8 %a0) { 314 ; SSE2-SSSE3-LABEL: ext_i8_8i32: 315 ; SSE2-SSSE3: # %bb.0: 316 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0 317 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 318 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8] 319 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 320 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 321 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 322 ; SSE2-SSSE3-NEXT: psrld $31, %xmm0 323 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 324 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 325 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1 326 ; SSE2-SSSE3-NEXT: psrld $31, %xmm1 327 ; SSE2-SSSE3-NEXT: retq 328 ; 329 ; AVX1-LABEL: ext_i8_8i32: 330 ; AVX1: # %bb.0: 331 ; AVX1-NEXT: vmovd %edi, %xmm0 332 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 333 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 334 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 335 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 336 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2 337 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 338 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 339 ; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2 340 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 341 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 342 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 343 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 344 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 345 ; AVX1-NEXT: retq 346 ; 347 ; AVX2-LABEL: ext_i8_8i32: 348 ; AVX2: # %bb.0: 349 ; AVX2-NEXT: vmovd %edi, %xmm0 350 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 351 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128] 352 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 353 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 354 ; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 355 ; AVX2-NEXT: retq 356 ; 357 ; AVX512F-LABEL: ext_i8_8i32: 358 ; AVX512F: # %bb.0: 359 ; AVX512F-NEXT: kmovw %edi, %k1 360 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 361 ; AVX512F-NEXT: vpsrld $31, %ymm0, %ymm0 362 ; AVX512F-NEXT: retq 363 ; 364 ; AVX512VLBW-LABEL: ext_i8_8i32: 365 ; AVX512VLBW: # %bb.0: 366 ; AVX512VLBW-NEXT: kmovd %edi, %k1 367 ; AVX512VLBW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 368 ; AVX512VLBW-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 369 ; AVX512VLBW-NEXT: vpsrld $31, %ymm0, %ymm0 370 ; AVX512VLBW-NEXT: retq 371 %1 = bitcast i8 %a0 to <8 x i1> 372 %2 = zext <8 x i1> %1 to <8 x i32> 373 ret <8 x i32> %2 374 } 375 376 define <16 x i16> @ext_i16_16i16(i16 %a0) { 377 ; SSE2-SSSE3-LABEL: ext_i16_16i16: 378 ; SSE2-SSSE3: # %bb.0: 379 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0 380 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 381 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 382 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128] 383 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 384 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 385 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0 386 ; SSE2-SSSE3-NEXT: psrlw $15, %xmm0 387 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768] 388 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 389 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1 390 ; SSE2-SSSE3-NEXT: psrlw $15, %xmm1 391 ; SSE2-SSSE3-NEXT: retq 392 ; 393 ; AVX1-LABEL: ext_i16_16i16: 394 ; AVX1: # %bb.0: 395 ; AVX1-NEXT: vmovd %edi, %xmm0 396 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 397 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 398 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 399 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 400 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 401 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2 402 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 403 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 404 ; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2 405 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 406 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 407 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 408 ; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0 409 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 410 ; AVX1-NEXT: retq 411 ; 412 ; AVX2-LABEL: ext_i16_16i16: 413 ; AVX2: # %bb.0: 414 ; AVX2-NEXT: vmovd %edi, %xmm0 415 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 416 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 417 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 418 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 419 ; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0 420 ; AVX2-NEXT: retq 421 ; 422 ; AVX512F-LABEL: ext_i16_16i16: 423 ; AVX512F: # %bb.0: 424 ; AVX512F-NEXT: kmovw %edi, %k1 425 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 426 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 427 ; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0 428 ; AVX512F-NEXT: retq 429 ; 430 ; AVX512VLBW-LABEL: ext_i16_16i16: 431 ; AVX512VLBW: # %bb.0: 432 ; AVX512VLBW-NEXT: kmovd %edi, %k0 433 ; AVX512VLBW-NEXT: vpmovm2w %k0, %ymm0 434 ; AVX512VLBW-NEXT: vpsrlw $15, %ymm0, %ymm0 435 ; AVX512VLBW-NEXT: retq 436 %1 = bitcast i16 %a0 to <16 x i1> 437 %2 = zext <16 x i1> %1 to <16 x i16> 438 ret <16 x i16> %2 439 } 440 441 define <32 x i8> @ext_i32_32i8(i32 %a0) { 442 ; SSE2-SSSE3-LABEL: ext_i32_32i8: 443 ; SSE2-SSSE3: # %bb.0: 444 ; SSE2-SSSE3-NEXT: movd %edi, %xmm1 445 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 446 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 447 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 448 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 449 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 450 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0 451 ; SSE2-SSSE3-NEXT: psrlw $7, %xmm0 452 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 453 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 454 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7] 455 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 456 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 457 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1 458 ; SSE2-SSSE3-NEXT: psrlw $7, %xmm1 459 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1 460 ; SSE2-SSSE3-NEXT: retq 461 ; 462 ; AVX1-LABEL: ext_i32_32i8: 463 ; AVX1: # %bb.0: 464 ; AVX1-NEXT: vmovd %edi, %xmm0 465 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 466 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] 467 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 468 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] 469 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 470 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 471 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 472 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 473 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 474 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 475 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 476 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 477 ; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1 478 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 479 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 480 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 481 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 482 ; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0 483 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 484 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 485 ; AVX1-NEXT: retq 486 ; 487 ; AVX2-SLOW-LABEL: ext_i32_32i8: 488 ; AVX2-SLOW: # %bb.0: 489 ; AVX2-SLOW-NEXT: vmovd %edi, %xmm0 490 ; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 491 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] 492 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 493 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] 494 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 495 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 496 ; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 497 ; AVX2-SLOW-NEXT: vpand %ymm1, %ymm0, %ymm0 498 ; AVX2-SLOW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 499 ; AVX2-SLOW-NEXT: vpsrlw $7, %ymm0, %ymm0 500 ; AVX2-SLOW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 501 ; AVX2-SLOW-NEXT: retq 502 ; 503 ; AVX2-FAST-LABEL: ext_i32_32i8: 504 ; AVX2-FAST: # %bb.0: 505 ; AVX2-FAST-NEXT: vmovd %edi, %xmm0 506 ; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 507 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3] 508 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7] 509 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 510 ; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 511 ; AVX2-FAST-NEXT: vpand %ymm1, %ymm0, %ymm0 512 ; AVX2-FAST-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 513 ; AVX2-FAST-NEXT: vpsrlw $7, %ymm0, %ymm0 514 ; AVX2-FAST-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 515 ; AVX2-FAST-NEXT: retq 516 ; 517 ; AVX512F-LABEL: ext_i32_32i8: 518 ; AVX512F: # %bb.0: 519 ; AVX512F-NEXT: kmovw %edi, %k1 520 ; AVX512F-NEXT: shrl $16, %edi 521 ; AVX512F-NEXT: kmovw %edi, %k2 522 ; AVX512F-NEXT: movl {{.*}}(%rip), %eax 523 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z} 524 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 525 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z} 526 ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 527 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 528 ; AVX512F-NEXT: retq 529 ; 530 ; AVX512VLBW-LABEL: ext_i32_32i8: 531 ; AVX512VLBW: # %bb.0: 532 ; AVX512VLBW-NEXT: kmovd %edi, %k1 533 ; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} 534 ; AVX512VLBW-NEXT: retq 535 %1 = bitcast i32 %a0 to <32 x i1> 536 %2 = zext <32 x i1> %1 to <32 x i8> 537 ret <32 x i8> %2 538 } 539 540 ; 541 ; 512-bit vectors 542 ; 543 544 define <8 x i64> @ext_i8_8i64(i8 %a0) { 545 ; SSE2-SSSE3-LABEL: ext_i8_8i64: 546 ; SSE2-SSSE3: # %bb.0: 547 ; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi 548 ; SSE2-SSSE3-NEXT: movq %rdi, %xmm0 549 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1] 550 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 551 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1 552 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 553 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 554 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 555 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 556 ; SSE2-SSSE3-NEXT: psrlq $63, %xmm0 557 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8] 558 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2 559 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 560 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 561 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] 562 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 563 ; SSE2-SSSE3-NEXT: psrlq $63, %xmm1 564 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32] 565 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3 566 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3 567 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3 568 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2] 569 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2 570 ; SSE2-SSSE3-NEXT: psrlq $63, %xmm2 571 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128] 572 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4 573 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4 574 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2] 575 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 576 ; SSE2-SSSE3-NEXT: psrlq $63, %xmm3 577 ; SSE2-SSSE3-NEXT: retq 578 ; 579 ; AVX1-LABEL: ext_i8_8i64: 580 ; AVX1: # %bb.0: 581 ; AVX1-NEXT: # kill: def $edi killed $edi def $rdi 582 ; AVX1-NEXT: vmovq %rdi, %xmm0 583 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 584 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 585 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0 586 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 587 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm3 588 ; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 589 ; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 590 ; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3 591 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 592 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 593 ; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 594 ; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0 595 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 596 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 597 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm3 598 ; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 599 ; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3 600 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 601 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 602 ; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 603 ; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1 604 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 605 ; AVX1-NEXT: retq 606 ; 607 ; AVX2-LABEL: ext_i8_8i64: 608 ; AVX2: # %bb.0: 609 ; AVX2-NEXT: # kill: def $edi killed $edi def $rdi 610 ; AVX2-NEXT: vmovq %rdi, %xmm0 611 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1 612 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8] 613 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2 614 ; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0 615 ; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0 616 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128] 617 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 618 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1 619 ; AVX2-NEXT: vpsrlq $63, %ymm1, %ymm1 620 ; AVX2-NEXT: retq 621 ; 622 ; AVX512F-LABEL: ext_i8_8i64: 623 ; AVX512F: # %bb.0: 624 ; AVX512F-NEXT: kmovw %edi, %k1 625 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 626 ; AVX512F-NEXT: vpsrlq $63, %zmm0, %zmm0 627 ; AVX512F-NEXT: retq 628 ; 629 ; AVX512VLBW-LABEL: ext_i8_8i64: 630 ; AVX512VLBW: # %bb.0: 631 ; AVX512VLBW-NEXT: kmovd %edi, %k1 632 ; AVX512VLBW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 633 ; AVX512VLBW-NEXT: vpsrlq $63, %zmm0, %zmm0 634 ; AVX512VLBW-NEXT: retq 635 %1 = bitcast i8 %a0 to <8 x i1> 636 %2 = zext <8 x i1> %1 to <8 x i64> 637 ret <8 x i64> %2 638 } 639 640 define <16 x i32> @ext_i16_16i32(i16 %a0) { 641 ; SSE2-SSSE3-LABEL: ext_i16_16i32: 642 ; SSE2-SSSE3: # %bb.0: 643 ; SSE2-SSSE3-NEXT: movd %edi, %xmm0 644 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 645 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8] 646 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0 647 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 648 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 649 ; SSE2-SSSE3-NEXT: psrld $31, %xmm0 650 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 651 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1 652 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 653 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1 654 ; SSE2-SSSE3-NEXT: psrld $31, %xmm1 655 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048] 656 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 657 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 658 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2 659 ; SSE2-SSSE3-NEXT: psrld $31, %xmm2 660 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768] 661 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 662 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3 663 ; SSE2-SSSE3-NEXT: psrld $31, %xmm3 664 ; SSE2-SSSE3-NEXT: retq 665 ; 666 ; AVX1-LABEL: ext_i16_16i32: 667 ; AVX1: # %bb.0: 668 ; AVX1-NEXT: vmovd %edi, %xmm0 669 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 670 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 671 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0 672 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 673 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm3 674 ; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 675 ; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 676 ; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3 677 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 678 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 679 ; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 680 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 681 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 682 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 683 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm3 684 ; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 685 ; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3 686 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 687 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 688 ; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 689 ; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 690 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 691 ; AVX1-NEXT: retq 692 ; 693 ; AVX2-LABEL: ext_i16_16i32: 694 ; AVX2: # %bb.0: 695 ; AVX2-NEXT: vmovd %edi, %xmm0 696 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1 697 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128] 698 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2 699 ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0 700 ; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 701 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768] 702 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 703 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1 704 ; AVX2-NEXT: vpsrld $31, %ymm1, %ymm1 705 ; AVX2-NEXT: retq 706 ; 707 ; AVX512F-LABEL: ext_i16_16i32: 708 ; AVX512F: # %bb.0: 709 ; AVX512F-NEXT: kmovw %edi, %k1 710 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 711 ; AVX512F-NEXT: vpsrld $31, %zmm0, %zmm0 712 ; AVX512F-NEXT: retq 713 ; 714 ; AVX512VLBW-LABEL: ext_i16_16i32: 715 ; AVX512VLBW: # %bb.0: 716 ; AVX512VLBW-NEXT: kmovd %edi, %k1 717 ; AVX512VLBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 718 ; AVX512VLBW-NEXT: vpsrld $31, %zmm0, %zmm0 719 ; AVX512VLBW-NEXT: retq 720 %1 = bitcast i16 %a0 to <16 x i1> 721 %2 = zext <16 x i1> %1 to <16 x i32> 722 ret <16 x i32> %2 723 } 724 725 define <32 x i16> @ext_i32_32i16(i32 %a0) { 726 ; SSE2-SSSE3-LABEL: ext_i32_32i16: 727 ; SSE2-SSSE3: # %bb.0: 728 ; SSE2-SSSE3-NEXT: movd %edi, %xmm2 729 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7] 730 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 731 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128] 732 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 733 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 734 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0 735 ; SSE2-SSSE3-NEXT: psrlw $15, %xmm0 736 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768] 737 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 738 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1 739 ; SSE2-SSSE3-NEXT: psrlw $15, %xmm1 740 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7] 741 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] 742 ; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 743 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 744 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2 745 ; SSE2-SSSE3-NEXT: psrlw $15, %xmm2 746 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3 747 ; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3 748 ; SSE2-SSSE3-NEXT: psrlw $15, %xmm3 749 ; SSE2-SSSE3-NEXT: retq 750 ; 751 ; AVX1-LABEL: ext_i32_32i16: 752 ; AVX1: # %bb.0: 753 ; AVX1-NEXT: vmovd %edi, %xmm1 754 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7] 755 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 756 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 757 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 758 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 759 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 760 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm4 761 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 762 ; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 763 ; AVX1-NEXT: vpsrlw $15, %xmm4, %xmm4 764 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 765 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 766 ; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0 767 ; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0 768 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0 769 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7] 770 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 771 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 772 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 773 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm2 774 ; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 775 ; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2 776 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 777 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 778 ; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1 779 ; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1 780 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 781 ; AVX1-NEXT: retq 782 ; 783 ; AVX2-LABEL: ext_i32_32i16: 784 ; AVX2: # %bb.0: 785 ; AVX2-NEXT: vmovd %edi, %xmm0 786 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 787 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 788 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 789 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 790 ; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0 791 ; AVX2-NEXT: shrl $16, %edi 792 ; AVX2-NEXT: vmovd %edi, %xmm2 793 ; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2 794 ; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2 795 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1 796 ; AVX2-NEXT: vpsrlw $15, %ymm1, %ymm1 797 ; AVX2-NEXT: retq 798 ; 799 ; AVX512F-LABEL: ext_i32_32i16: 800 ; AVX512F: # %bb.0: 801 ; AVX512F-NEXT: kmovw %edi, %k1 802 ; AVX512F-NEXT: shrl $16, %edi 803 ; AVX512F-NEXT: kmovw %edi, %k2 804 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 805 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 806 ; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0 807 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} 808 ; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 809 ; AVX512F-NEXT: vpsrlw $15, %ymm1, %ymm1 810 ; AVX512F-NEXT: retq 811 ; 812 ; AVX512VLBW-LABEL: ext_i32_32i16: 813 ; AVX512VLBW: # %bb.0: 814 ; AVX512VLBW-NEXT: kmovd %edi, %k0 815 ; AVX512VLBW-NEXT: vpmovm2w %k0, %zmm0 816 ; AVX512VLBW-NEXT: vpsrlw $15, %zmm0, %zmm0 817 ; AVX512VLBW-NEXT: retq 818 %1 = bitcast i32 %a0 to <32 x i1> 819 %2 = zext <32 x i1> %1 to <32 x i16> 820 ret <32 x i16> %2 821 } 822 823 define <64 x i8> @ext_i64_64i8(i64 %a0) { 824 ; SSE2-SSSE3-LABEL: ext_i64_64i8: 825 ; SSE2-SSSE3: # %bb.0: 826 ; SSE2-SSSE3-NEXT: movq %rdi, %xmm3 827 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 828 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7] 829 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 830 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 831 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 832 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0 833 ; SSE2-SSSE3-NEXT: psrlw $7, %xmm0 834 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 835 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0 836 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7] 837 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 838 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 839 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1 840 ; SSE2-SSSE3-NEXT: psrlw $7, %xmm1 841 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 842 ; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5] 843 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] 844 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 845 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2 846 ; SSE2-SSSE3-NEXT: psrlw $7, %xmm2 847 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm2 848 ; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7] 849 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] 850 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 851 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3 852 ; SSE2-SSSE3-NEXT: psrlw $7, %xmm3 853 ; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3 854 ; SSE2-SSSE3-NEXT: retq 855 ; 856 ; AVX1-LABEL: ext_i64_64i8: 857 ; AVX1: # %bb.0: 858 ; AVX1-NEXT: vmovq %rdi, %xmm0 859 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 860 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 861 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 862 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7] 863 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] 864 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 865 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 866 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 867 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 868 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 869 ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3 870 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 871 ; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3 872 ; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3 873 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 874 ; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 875 ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0 876 ; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0 877 ; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0 878 ; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0 879 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 880 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5] 881 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] 882 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7] 883 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 884 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 885 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 886 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 887 ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2 888 ; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 889 ; AVX1-NEXT: vpsrlw $7, %xmm2, %xmm2 890 ; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2 891 ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1 892 ; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1 893 ; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1 894 ; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1 895 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 896 ; AVX1-NEXT: retq 897 ; 898 ; AVX2-SLOW-LABEL: ext_i64_64i8: 899 ; AVX2-SLOW: # %bb.0: 900 ; AVX2-SLOW-NEXT: vmovq %rdi, %xmm0 901 ; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 902 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 903 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 904 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7] 905 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] 906 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 907 ; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 908 ; AVX2-SLOW-NEXT: vpand %ymm2, %ymm0, %ymm0 909 ; AVX2-SLOW-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 910 ; AVX2-SLOW-NEXT: vpsrlw $7, %ymm0, %ymm0 911 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 912 ; AVX2-SLOW-NEXT: vpand %ymm3, %ymm0, %ymm0 913 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,4,4,5,5] 914 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3] 915 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7] 916 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 917 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm4, %ymm1 918 ; AVX2-SLOW-NEXT: vpand %ymm2, %ymm1, %ymm1 919 ; AVX2-SLOW-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 920 ; AVX2-SLOW-NEXT: vpsrlw $7, %ymm1, %ymm1 921 ; AVX2-SLOW-NEXT: vpand %ymm3, %ymm1, %ymm1 922 ; AVX2-SLOW-NEXT: retq 923 ; 924 ; AVX2-FAST-LABEL: ext_i64_64i8: 925 ; AVX2-FAST: # %bb.0: 926 ; AVX2-FAST-NEXT: vmovq %rdi, %xmm0 927 ; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 928 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm1[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3] 929 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm1[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7] 930 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 931 ; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 932 ; AVX2-FAST-NEXT: vpand %ymm2, %ymm0, %ymm0 933 ; AVX2-FAST-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 934 ; AVX2-FAST-NEXT: vpsrlw $7, %ymm0, %ymm0 935 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 936 ; AVX2-FAST-NEXT: vpand %ymm3, %ymm0, %ymm0 937 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[8,9,8,9,8,9,8,9,10,11,10,11,10,11,10,11] 938 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,12,13,12,13,12,13,14,15,14,15,14,15,14,15] 939 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm4, %ymm1 940 ; AVX2-FAST-NEXT: vpand %ymm2, %ymm1, %ymm1 941 ; AVX2-FAST-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 942 ; AVX2-FAST-NEXT: vpsrlw $7, %ymm1, %ymm1 943 ; AVX2-FAST-NEXT: vpand %ymm3, %ymm1, %ymm1 944 ; AVX2-FAST-NEXT: retq 945 ; 946 ; AVX512F-LABEL: ext_i64_64i8: 947 ; AVX512F: # %bb.0: 948 ; AVX512F-NEXT: movq %rdi, %rax 949 ; AVX512F-NEXT: movq %rdi, %rcx 950 ; AVX512F-NEXT: kmovw %edi, %k1 951 ; AVX512F-NEXT: movl %edi, %edx 952 ; AVX512F-NEXT: shrl $16, %edx 953 ; AVX512F-NEXT: shrq $32, %rax 954 ; AVX512F-NEXT: shrq $48, %rcx 955 ; AVX512F-NEXT: kmovw %ecx, %k2 956 ; AVX512F-NEXT: kmovw %eax, %k3 957 ; AVX512F-NEXT: kmovw %edx, %k4 958 ; AVX512F-NEXT: movl {{.*}}(%rip), %eax 959 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z} 960 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 961 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k4} {z} 962 ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 963 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 964 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k3} {z} 965 ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 966 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm2 {%k2} {z} 967 ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 968 ; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 969 ; AVX512F-NEXT: retq 970 ; 971 ; AVX512VLBW-LABEL: ext_i64_64i8: 972 ; AVX512VLBW: # %bb.0: 973 ; AVX512VLBW-NEXT: kmovq %rdi, %k1 974 ; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} 975 ; AVX512VLBW-NEXT: retq 976 %1 = bitcast i64 %a0 to <64 x i1> 977 %2 = zext <64 x i1> %1 to <64 x i8> 978 ret <64 x i8> %2 979 } 980