1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CD 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,AVX512VLCDBW 4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,X86-AVX512VLCDBW 5 6 define <2 x i64> @test_mm_epi64(<8 x i16> %a, <8 x i16> %b) { 7 ; AVX512CD-LABEL: test_mm_epi64: 8 ; AVX512CD: # %bb.0: # %entry 9 ; AVX512CD-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 10 ; AVX512CD-NEXT: vpmovsxwq %xmm0, %zmm0 11 ; AVX512CD-NEXT: vptestmq %zmm0, %zmm0, %k0 12 ; AVX512CD-NEXT: kmovw %k0, %eax 13 ; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0 14 ; AVX512CD-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 15 ; AVX512CD-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 16 ; AVX512CD-NEXT: vzeroupper 17 ; AVX512CD-NEXT: retq 18 ; 19 ; AVX512VLCDBW-LABEL: test_mm_epi64: 20 ; AVX512VLCDBW: # %bb.0: # %entry 21 ; AVX512VLCDBW-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 22 ; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %xmm0 23 ; AVX512VLCDBW-NEXT: retq 24 ; 25 ; X86-AVX512VLCDBW-LABEL: test_mm_epi64: 26 ; X86-AVX512VLCDBW: # %bb.0: # %entry 27 ; X86-AVX512VLCDBW-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 28 ; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax 29 ; X86-AVX512VLCDBW-NEXT: movzbl %al, %eax 30 ; X86-AVX512VLCDBW-NEXT: vmovd %eax, %xmm0 31 ; X86-AVX512VLCDBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero 32 ; X86-AVX512VLCDBW-NEXT: retl 33 entry: 34 %0 = icmp eq <8 x i16> %a, %b 35 %1 = bitcast <8 x i1> %0 to i8 36 %conv.i = zext i8 %1 to i64 37 %vecinit.i.i = insertelement <2 x i64> undef, i64 %conv.i, i32 0 38 %vecinit1.i.i = shufflevector <2 x i64> %vecinit.i.i, <2 x i64> undef, <2 x i32> zeroinitializer 39 ret <2 x i64> %vecinit1.i.i 40 } 41 42 define <4 x i32> @test_mm_epi32(<16 x i8> %a, <16 x i8> %b) { 43 ; AVX512CD-LABEL: test_mm_epi32: 44 ; AVX512CD: # %bb.0: # %entry 45 ; AVX512CD-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 46 ; AVX512CD-NEXT: vpmovsxbd %xmm0, %zmm0 47 ; AVX512CD-NEXT: vptestmd %zmm0, %zmm0, %k0 48 ; AVX512CD-NEXT: kmovw %k0, %eax 49 ; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0 50 ; AVX512CD-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 51 ; AVX512CD-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 52 ; AVX512CD-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 53 ; AVX512CD-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 54 ; AVX512CD-NEXT: vzeroupper 55 ; AVX512CD-NEXT: retq 56 ; 57 ; AVX512VLCDBW-LABEL: test_mm_epi32: 58 ; AVX512VLCDBW: # %bb.0: # %entry 59 ; AVX512VLCDBW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 60 ; AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %xmm0 61 ; AVX512VLCDBW-NEXT: retq 62 ; 63 ; X86-AVX512VLCDBW-LABEL: test_mm_epi32: 64 ; X86-AVX512VLCDBW: # %bb.0: # %entry 65 ; X86-AVX512VLCDBW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 66 ; X86-AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %xmm0 67 ; X86-AVX512VLCDBW-NEXT: retl 68 entry: 69 %0 = icmp eq <16 x i8> %a, %b 70 %1 = bitcast <16 x i1> %0 to i16 71 %conv.i = zext i16 %1 to i32 72 %vecinit.i.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0 73 %vecinit3.i.i = shufflevector <4 x i32> %vecinit.i.i, <4 x i32> undef, <4 x i32> zeroinitializer 74 ret <4 x i32> %vecinit3.i.i 75 } 76 77 define <16 x i32> @test_mm512_epi32(<16 x i32> %a, <16 x i32> %b) { 78 ; AVX512CD-LABEL: test_mm512_epi32: 79 ; AVX512CD: # %bb.0: # %entry 80 ; AVX512CD-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 81 ; AVX512CD-NEXT: vpbroadcastmw2d %k0, %zmm0 82 ; AVX512CD-NEXT: retq 83 ; 84 ; AVX512VLCDBW-LABEL: test_mm512_epi32: 85 ; AVX512VLCDBW: # %bb.0: # %entry 86 ; AVX512VLCDBW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 87 ; AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %zmm0 88 ; AVX512VLCDBW-NEXT: retq 89 ; 90 ; X86-AVX512VLCDBW-LABEL: test_mm512_epi32: 91 ; X86-AVX512VLCDBW: # %bb.0: # %entry 92 ; X86-AVX512VLCDBW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 93 ; X86-AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %zmm0 94 ; X86-AVX512VLCDBW-NEXT: retl 95 entry: 96 %0 = icmp eq <16 x i32> %a, %b 97 %1 = bitcast <16 x i1> %0 to i16 98 %conv.i = zext i16 %1 to i32 99 %vecinit.i.i = insertelement <16 x i32> undef, i32 %conv.i, i32 0 100 %vecinit15.i.i = shufflevector <16 x i32> %vecinit.i.i, <16 x i32> undef, <16 x i32> zeroinitializer 101 ret <16 x i32> %vecinit15.i.i 102 } 103 104 define <8 x i64> @test_mm512_epi64(<8 x i32> %a, <8 x i32> %b) { 105 ; AVX512CD-LABEL: test_mm512_epi64: 106 ; AVX512CD: # %bb.0: # %entry 107 ; AVX512CD-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 108 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 109 ; AVX512CD-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 110 ; AVX512CD-NEXT: vpbroadcastmb2q %k0, %zmm0 111 ; AVX512CD-NEXT: retq 112 ; 113 ; AVX512VLCDBW-LABEL: test_mm512_epi64: 114 ; AVX512VLCDBW: # %bb.0: # %entry 115 ; AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 116 ; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %zmm0 117 ; AVX512VLCDBW-NEXT: retq 118 ; 119 ; X86-AVX512VLCDBW-LABEL: test_mm512_epi64: 120 ; X86-AVX512VLCDBW: # %bb.0: # %entry 121 ; X86-AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 122 ; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax 123 ; X86-AVX512VLCDBW-NEXT: movzbl %al, %eax 124 ; X86-AVX512VLCDBW-NEXT: vmovd %eax, %xmm0 125 ; X86-AVX512VLCDBW-NEXT: vpbroadcastq %xmm0, %zmm0 126 ; X86-AVX512VLCDBW-NEXT: retl 127 entry: 128 %0 = icmp eq <8 x i32> %a, %b 129 %1 = bitcast <8 x i1> %0 to i8 130 %conv.i = zext i8 %1 to i64 131 %vecinit.i.i = insertelement <8 x i64> undef, i64 %conv.i, i32 0 132 %vecinit7.i.i = shufflevector <8 x i64> %vecinit.i.i, <8 x i64> undef, <8 x i32> zeroinitializer 133 ret <8 x i64> %vecinit7.i.i 134 } 135 136 define <4 x i64> @test_mm256_epi64(<8 x i32> %a, <8 x i32> %b) { 137 ; AVX512CD-LABEL: test_mm256_epi64: 138 ; AVX512CD: # %bb.0: # %entry 139 ; AVX512CD-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 140 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 141 ; AVX512CD-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 142 ; AVX512CD-NEXT: kmovw %k0, %eax 143 ; AVX512CD-NEXT: movzbl %al, %eax 144 ; AVX512CD-NEXT: vmovq %rax, %xmm0 145 ; AVX512CD-NEXT: vpbroadcastq %xmm0, %ymm0 146 ; AVX512CD-NEXT: retq 147 ; 148 ; AVX512VLCDBW-LABEL: test_mm256_epi64: 149 ; AVX512VLCDBW: # %bb.0: # %entry 150 ; AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 151 ; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %ymm0 152 ; AVX512VLCDBW-NEXT: retq 153 ; 154 ; X86-AVX512VLCDBW-LABEL: test_mm256_epi64: 155 ; X86-AVX512VLCDBW: # %bb.0: # %entry 156 ; X86-AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 157 ; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax 158 ; X86-AVX512VLCDBW-NEXT: movzbl %al, %eax 159 ; X86-AVX512VLCDBW-NEXT: vmovd %eax, %xmm0 160 ; X86-AVX512VLCDBW-NEXT: vpbroadcastq %xmm0, %ymm0 161 ; X86-AVX512VLCDBW-NEXT: retl 162 entry: 163 %0 = icmp eq <8 x i32> %a, %b 164 %1 = bitcast <8 x i1> %0 to i8 165 %conv.i = zext i8 %1 to i64 166 %vecinit.i.i = insertelement <4 x i64> undef, i64 %conv.i, i32 0 167 %vecinit3.i.i = shufflevector <4 x i64> %vecinit.i.i, <4 x i64> undef, <4 x i32> zeroinitializer 168 ret <4 x i64> %vecinit3.i.i 169 } 170 171 define <8 x i32> @test_mm256_epi32(<16 x i16> %a, <16 x i16> %b) { 172 ; AVX512CD-LABEL: test_mm256_epi32: 173 ; AVX512CD: # %bb.0: # %entry 174 ; AVX512CD-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 175 ; AVX512CD-NEXT: vpmovsxwd %ymm0, %zmm0 176 ; AVX512CD-NEXT: vptestmd %zmm0, %zmm0, %k0 177 ; AVX512CD-NEXT: kmovw %k0, %eax 178 ; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0 179 ; AVX512CD-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 180 ; AVX512CD-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 181 ; AVX512CD-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 182 ; AVX512CD-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 183 ; AVX512CD-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 184 ; AVX512CD-NEXT: retq 185 ; 186 ; AVX512VLCDBW-LABEL: test_mm256_epi32: 187 ; AVX512VLCDBW: # %bb.0: # %entry 188 ; AVX512VLCDBW-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 189 ; AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %ymm0 190 ; AVX512VLCDBW-NEXT: retq 191 ; 192 ; X86-AVX512VLCDBW-LABEL: test_mm256_epi32: 193 ; X86-AVX512VLCDBW: # %bb.0: # %entry 194 ; X86-AVX512VLCDBW-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 195 ; X86-AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %ymm0 196 ; X86-AVX512VLCDBW-NEXT: retl 197 entry: 198 %0 = icmp eq <16 x i16> %a, %b 199 %1 = bitcast <16 x i1> %0 to i16 200 %conv.i = zext i16 %1 to i32 201 %vecinit.i.i = insertelement <8 x i32> undef, i32 %conv.i, i32 0 202 %vecinit7.i.i = shufflevector <8 x i32> %vecinit.i.i, <8 x i32> undef, <8 x i32> zeroinitializer 203 ret <8 x i32> %vecinit7.i.i 204 } 205 206