1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW 7 8 define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) { 9 ; SSE-LABEL: v8i64: 10 ; SSE: # %bb.0: 11 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 12 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 13 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 14 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 15 ; SSE-NEXT: pcmpgtq %xmm7, %xmm3 16 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 17 ; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 18 ; SSE-NEXT: pcmpgtq %xmm6, %xmm2 19 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 20 ; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 21 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 22 ; SSE-NEXT: pcmpgtq %xmm5, %xmm1 23 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 24 ; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 25 ; SSE-NEXT: pcmpgtq %xmm4, %xmm0 26 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 27 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 28 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 29 ; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 30 ; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm11 31 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm11[0,2,2,3] 32 ; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7] 33 ; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm10 34 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm10[0,2,2,3] 35 ; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 36 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 37 ; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm9 38 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm9[0,2,2,3] 39 ; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 40 ; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm8 41 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm8[0,2,2,3] 42 ; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7] 43 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 44 ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7] 45 ; SSE-NEXT: pand %xmm0, %xmm3 46 ; SSE-NEXT: packsswb %xmm0, %xmm3 47 ; SSE-NEXT: pmovmskb %xmm3, %eax 48 ; SSE-NEXT: # kill: def $al killed $al killed $eax 49 ; SSE-NEXT: retq 50 ; 51 ; AVX1-LABEL: v8i64: 52 ; AVX1: # %bb.0: 53 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8 54 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 55 ; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8 56 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 57 ; AVX1-NEXT: vpackssdw %xmm8, %xmm1, %xmm8 58 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 59 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 60 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 61 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 62 ; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 63 ; AVX1-NEXT: vpackssdw %xmm8, %xmm0, %xmm0 64 ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1 65 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2 66 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 67 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm2 68 ; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 69 ; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2 70 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3 71 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 72 ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm3 73 ; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2 74 ; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 75 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 76 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 77 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 78 ; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 79 ; AVX1-NEXT: vpmovmskb %xmm0, %eax 80 ; AVX1-NEXT: # kill: def $al killed $al killed $eax 81 ; AVX1-NEXT: vzeroupper 82 ; AVX1-NEXT: retq 83 ; 84 ; AVX2-LABEL: v8i64: 85 ; AVX2: # %bb.0: 86 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1 87 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 88 ; AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 89 ; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 90 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 91 ; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 92 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 93 ; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm1 94 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 95 ; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 96 ; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm2 97 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 98 ; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 99 ; AVX2-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 100 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 101 ; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0 102 ; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0 103 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 104 ; AVX2-NEXT: vpmovmskb %xmm0, %eax 105 ; AVX2-NEXT: # kill: def $al killed $al killed $eax 106 ; AVX2-NEXT: vzeroupper 107 ; AVX2-NEXT: retq 108 ; 109 ; AVX512F-LABEL: v8i64: 110 ; AVX512F: # %bb.0: 111 ; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 112 ; AVX512F-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1} 113 ; AVX512F-NEXT: kmovw %k0, %eax 114 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 115 ; AVX512F-NEXT: vzeroupper 116 ; AVX512F-NEXT: retq 117 ; 118 ; AVX512BW-LABEL: v8i64: 119 ; AVX512BW: # %bb.0: 120 ; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 121 ; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1} 122 ; AVX512BW-NEXT: kmovd %k0, %eax 123 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 124 ; AVX512BW-NEXT: vzeroupper 125 ; AVX512BW-NEXT: retq 126 %x0 = icmp sgt <8 x i64> %a, %b 127 %x1 = icmp sgt <8 x i64> %c, %d 128 %y = and <8 x i1> %x0, %x1 129 %res = bitcast <8 x i1> %y to i8 130 ret i8 %res 131 } 132 133 define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) { 134 ; SSE-LABEL: v8f64: 135 ; SSE: # %bb.0: 136 ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8 137 ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm9 138 ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm10 139 ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm11 140 ; SSE-NEXT: cmpltpd %xmm3, %xmm7 141 ; SSE-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,2,2,3] 142 ; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm7[0,1,0,2,4,5,6,7] 143 ; SSE-NEXT: cmpltpd %xmm2, %xmm6 144 ; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2,2,3] 145 ; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm6[0,1,0,2,4,5,6,7] 146 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 147 ; SSE-NEXT: cmpltpd %xmm1, %xmm5 148 ; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2,2,3] 149 ; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm5[0,2,2,3,4,5,6,7] 150 ; SSE-NEXT: cmpltpd %xmm0, %xmm4 151 ; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,2,2,3] 152 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm4[0,2,2,3,4,5,6,7] 153 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 154 ; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 155 ; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm11 156 ; SSE-NEXT: shufps {{.*#+}} xmm11 = xmm11[0,2,2,3] 157 ; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm11[0,1,0,2,4,5,6,7] 158 ; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm10 159 ; SSE-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2,2,3] 160 ; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm10[0,1,0,2,4,5,6,7] 161 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 162 ; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm9 163 ; SSE-NEXT: shufps {{.*#+}} xmm9 = xmm9[0,2,2,3] 164 ; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm9[0,2,2,3,4,5,6,7] 165 ; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm8 166 ; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2,2,3] 167 ; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm8[0,2,2,3,4,5,6,7] 168 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 169 ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7] 170 ; SSE-NEXT: pand %xmm0, %xmm3 171 ; SSE-NEXT: packsswb %xmm0, %xmm3 172 ; SSE-NEXT: pmovmskb %xmm3, %eax 173 ; SSE-NEXT: # kill: def $al killed $al killed $eax 174 ; SSE-NEXT: retq 175 ; 176 ; AVX12-LABEL: v8f64: 177 ; AVX12: # %bb.0: 178 ; AVX12-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1 179 ; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3 180 ; AVX12-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 181 ; AVX12-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 182 ; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2 183 ; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 184 ; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 185 ; AVX12-NEXT: vcmpltpd %ymm5, %ymm7, %ymm1 186 ; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2 187 ; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 188 ; AVX12-NEXT: vcmpltpd %ymm4, %ymm6, %ymm2 189 ; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm3 190 ; AVX12-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 191 ; AVX12-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 192 ; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 193 ; AVX12-NEXT: vpsllw $15, %xmm0, %xmm0 194 ; AVX12-NEXT: vpsraw $15, %xmm0, %xmm0 195 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 196 ; AVX12-NEXT: vpmovmskb %xmm0, %eax 197 ; AVX12-NEXT: # kill: def $al killed $al killed $eax 198 ; AVX12-NEXT: vzeroupper 199 ; AVX12-NEXT: retq 200 ; 201 ; AVX512F-LABEL: v8f64: 202 ; AVX512F: # %bb.0: 203 ; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1 204 ; AVX512F-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1} 205 ; AVX512F-NEXT: kmovw %k0, %eax 206 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 207 ; AVX512F-NEXT: vzeroupper 208 ; AVX512F-NEXT: retq 209 ; 210 ; AVX512BW-LABEL: v8f64: 211 ; AVX512BW: # %bb.0: 212 ; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1 213 ; AVX512BW-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1} 214 ; AVX512BW-NEXT: kmovd %k0, %eax 215 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 216 ; AVX512BW-NEXT: vzeroupper 217 ; AVX512BW-NEXT: retq 218 %x0 = fcmp ogt <8 x double> %a, %b 219 %x1 = fcmp ogt <8 x double> %c, %d 220 %y = and <8 x i1> %x0, %x1 221 %res = bitcast <8 x i1> %y to i8 222 ret i8 %res 223 } 224 225 define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) { 226 ; SSE-LABEL: v32i16: 227 ; SSE: # %bb.0: 228 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 229 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 230 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 231 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 232 ; SSE-NEXT: pcmpgtw %xmm5, %xmm1 233 ; SSE-NEXT: pcmpgtw %xmm4, %xmm0 234 ; SSE-NEXT: packsswb %xmm1, %xmm0 235 ; SSE-NEXT: pcmpgtw %xmm7, %xmm3 236 ; SSE-NEXT: pcmpgtw %xmm6, %xmm2 237 ; SSE-NEXT: packsswb %xmm3, %xmm2 238 ; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm11 239 ; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm10 240 ; SSE-NEXT: packsswb %xmm11, %xmm10 241 ; SSE-NEXT: pand %xmm0, %xmm10 242 ; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm9 243 ; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm8 244 ; SSE-NEXT: packsswb %xmm9, %xmm8 245 ; SSE-NEXT: pand %xmm2, %xmm8 246 ; SSE-NEXT: pmovmskb %xmm10, %ecx 247 ; SSE-NEXT: pmovmskb %xmm8, %eax 248 ; SSE-NEXT: shll $16, %eax 249 ; SSE-NEXT: orl %ecx, %eax 250 ; SSE-NEXT: retq 251 ; 252 ; AVX1-LABEL: v32i16: 253 ; AVX1: # %bb.0: 254 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8 255 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 256 ; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8 257 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1 258 ; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm8 259 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 260 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 261 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1 262 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0 263 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 264 ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1 265 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2 266 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 267 ; AVX1-NEXT: vpcmpgtw %xmm7, %xmm5, %xmm2 268 ; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1 269 ; AVX1-NEXT: vpand %xmm1, %xmm8, %xmm1 270 ; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2 271 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3 272 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 273 ; AVX1-NEXT: vpcmpgtw %xmm6, %xmm4, %xmm3 274 ; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2 275 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 276 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx 277 ; AVX1-NEXT: vpmovmskb %xmm1, %eax 278 ; AVX1-NEXT: shll $16, %eax 279 ; AVX1-NEXT: orl %ecx, %eax 280 ; AVX1-NEXT: vzeroupper 281 ; AVX1-NEXT: retq 282 ; 283 ; AVX2-LABEL: v32i16: 284 ; AVX2: # %bb.0: 285 ; AVX2-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 286 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 287 ; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1 288 ; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 289 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 290 ; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 291 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 292 ; AVX2-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1 293 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 294 ; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 295 ; AVX2-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2 296 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 297 ; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2 298 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 299 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 300 ; AVX2-NEXT: vpmovmskb %ymm0, %eax 301 ; AVX2-NEXT: vzeroupper 302 ; AVX2-NEXT: retq 303 ; 304 ; AVX512F-LABEL: v32i16: 305 ; AVX512F: # %bb.0: 306 ; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 307 ; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 308 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 309 ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 310 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 311 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 312 ; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm0 313 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 314 ; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm1 315 ; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 316 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} 317 ; AVX512F-NEXT: kmovw %k0, %ecx 318 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} 319 ; AVX512F-NEXT: kmovw %k0, %eax 320 ; AVX512F-NEXT: shll $16, %eax 321 ; AVX512F-NEXT: orl %ecx, %eax 322 ; AVX512F-NEXT: vzeroupper 323 ; AVX512F-NEXT: retq 324 ; 325 ; AVX512BW-LABEL: v32i16: 326 ; AVX512BW: # %bb.0: 327 ; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k1 328 ; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm2, %k0 {%k1} 329 ; AVX512BW-NEXT: kmovd %k0, %eax 330 ; AVX512BW-NEXT: vzeroupper 331 ; AVX512BW-NEXT: retq 332 %x0 = icmp sgt <32 x i16> %a, %b 333 %x1 = icmp sgt <32 x i16> %c, %d 334 %y = and <32 x i1> %x0, %x1 335 %res = bitcast <32 x i1> %y to i32 336 ret i32 %res 337 } 338 339 define i16 @v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i32> %d) { 340 ; SSE-LABEL: v16i32: 341 ; SSE: # %bb.0: 342 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 343 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 344 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 345 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 346 ; SSE-NEXT: pcmpgtd %xmm7, %xmm3 347 ; SSE-NEXT: movdqa {{.*#+}} xmm7 = <u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u> 348 ; SSE-NEXT: pshufb %xmm7, %xmm3 349 ; SSE-NEXT: pcmpgtd %xmm6, %xmm2 350 ; SSE-NEXT: pshufb %xmm7, %xmm2 351 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 352 ; SSE-NEXT: pcmpgtd %xmm5, %xmm1 353 ; SSE-NEXT: movdqa {{.*#+}} xmm3 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> 354 ; SSE-NEXT: pshufb %xmm3, %xmm1 355 ; SSE-NEXT: pcmpgtd %xmm4, %xmm0 356 ; SSE-NEXT: pshufb %xmm3, %xmm0 357 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 358 ; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 359 ; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm11 360 ; SSE-NEXT: pshufb %xmm7, %xmm11 361 ; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm9 362 ; SSE-NEXT: pshufb %xmm7, %xmm9 363 ; SSE-NEXT: punpckldq {{.*#+}} xmm9 = xmm9[0],xmm11[0],xmm9[1],xmm11[1] 364 ; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm10 365 ; SSE-NEXT: pshufb %xmm3, %xmm10 366 ; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm8 367 ; SSE-NEXT: pshufb %xmm3, %xmm8 368 ; SSE-NEXT: punpckldq {{.*#+}} xmm8 = xmm8[0],xmm10[0],xmm8[1],xmm10[1] 369 ; SSE-NEXT: pblendw {{.*#+}} xmm8 = xmm8[0,1,2,3],xmm9[4,5,6,7] 370 ; SSE-NEXT: pand %xmm0, %xmm8 371 ; SSE-NEXT: pmovmskb %xmm8, %eax 372 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax 373 ; SSE-NEXT: retq 374 ; 375 ; AVX1-LABEL: v16i32: 376 ; AVX1: # %bb.0: 377 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8 378 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 379 ; AVX1-NEXT: vpcmpgtd %xmm8, %xmm9, %xmm8 380 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1 381 ; AVX1-NEXT: vpackssdw %xmm8, %xmm1, %xmm8 382 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 383 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 384 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1 385 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0 386 ; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 387 ; AVX1-NEXT: vpacksswb %xmm8, %xmm0, %xmm0 388 ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1 389 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2 390 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 391 ; AVX1-NEXT: vpcmpgtd %xmm7, %xmm5, %xmm2 392 ; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 393 ; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2 394 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3 395 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 396 ; AVX1-NEXT: vpcmpgtd %xmm6, %xmm4, %xmm3 397 ; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2 398 ; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1 399 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 400 ; AVX1-NEXT: vpmovmskb %xmm0, %eax 401 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 402 ; AVX1-NEXT: vzeroupper 403 ; AVX1-NEXT: retq 404 ; 405 ; AVX2-LABEL: v16i32: 406 ; AVX2: # %bb.0: 407 ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1 408 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 409 ; AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 410 ; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0 411 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 412 ; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 413 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 414 ; AVX2-NEXT: vpcmpgtd %ymm7, %ymm5, %ymm1 415 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 416 ; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 417 ; AVX2-NEXT: vpcmpgtd %ymm6, %ymm4, %ymm2 418 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 419 ; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 420 ; AVX2-NEXT: vpacksswb %xmm1, %xmm2, %xmm1 421 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 422 ; AVX2-NEXT: vpmovmskb %xmm0, %eax 423 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 424 ; AVX2-NEXT: vzeroupper 425 ; AVX2-NEXT: retq 426 ; 427 ; AVX512F-LABEL: v16i32: 428 ; AVX512F: # %bb.0: 429 ; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 430 ; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1} 431 ; AVX512F-NEXT: kmovw %k0, %eax 432 ; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax 433 ; AVX512F-NEXT: vzeroupper 434 ; AVX512F-NEXT: retq 435 ; 436 ; AVX512BW-LABEL: v16i32: 437 ; AVX512BW: # %bb.0: 438 ; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 439 ; AVX512BW-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1} 440 ; AVX512BW-NEXT: kmovd %k0, %eax 441 ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax 442 ; AVX512BW-NEXT: vzeroupper 443 ; AVX512BW-NEXT: retq 444 %x0 = icmp sgt <16 x i32> %a, %b 445 %x1 = icmp sgt <16 x i32> %c, %d 446 %y = and <16 x i1> %x0, %x1 447 %res = bitcast <16 x i1> %y to i16 448 ret i16 %res 449 } 450 451 define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) { 452 ; SSE-LABEL: v16f32: 453 ; SSE: # %bb.0: 454 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8 455 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10 456 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9 457 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11 458 ; SSE-NEXT: cmpltps %xmm3, %xmm7 459 ; SSE-NEXT: movdqa {{.*#+}} xmm3 = <u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u> 460 ; SSE-NEXT: pshufb %xmm3, %xmm7 461 ; SSE-NEXT: cmpltps %xmm2, %xmm6 462 ; SSE-NEXT: pshufb %xmm3, %xmm6 463 ; SSE-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1] 464 ; SSE-NEXT: cmpltps %xmm1, %xmm5 465 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> 466 ; SSE-NEXT: pshufb %xmm1, %xmm5 467 ; SSE-NEXT: cmpltps %xmm0, %xmm4 468 ; SSE-NEXT: pshufb %xmm1, %xmm4 469 ; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] 470 ; SSE-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm6[4,5,6,7] 471 ; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm11 472 ; SSE-NEXT: pshufb %xmm3, %xmm11 473 ; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9 474 ; SSE-NEXT: pshufb %xmm3, %xmm9 475 ; SSE-NEXT: punpckldq {{.*#+}} xmm9 = xmm9[0],xmm11[0],xmm9[1],xmm11[1] 476 ; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm10 477 ; SSE-NEXT: pshufb %xmm1, %xmm10 478 ; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8 479 ; SSE-NEXT: pshufb %xmm1, %xmm8 480 ; SSE-NEXT: punpckldq {{.*#+}} xmm8 = xmm8[0],xmm10[0],xmm8[1],xmm10[1] 481 ; SSE-NEXT: pblendw {{.*#+}} xmm8 = xmm8[0,1,2,3],xmm9[4,5,6,7] 482 ; SSE-NEXT: pand %xmm4, %xmm8 483 ; SSE-NEXT: pmovmskb %xmm8, %eax 484 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax 485 ; SSE-NEXT: retq 486 ; 487 ; AVX12-LABEL: v16f32: 488 ; AVX12: # %bb.0: 489 ; AVX12-NEXT: vcmpltps %ymm1, %ymm3, %ymm1 490 ; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3 491 ; AVX12-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 492 ; AVX12-NEXT: vcmpltps %ymm0, %ymm2, %ymm0 493 ; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2 494 ; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 495 ; AVX12-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 496 ; AVX12-NEXT: vcmpltps %ymm5, %ymm7, %ymm1 497 ; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2 498 ; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 499 ; AVX12-NEXT: vcmpltps %ymm4, %ymm6, %ymm2 500 ; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm3 501 ; AVX12-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 502 ; AVX12-NEXT: vpacksswb %xmm1, %xmm2, %xmm1 503 ; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 504 ; AVX12-NEXT: vpmovmskb %xmm0, %eax 505 ; AVX12-NEXT: # kill: def $ax killed $ax killed $eax 506 ; AVX12-NEXT: vzeroupper 507 ; AVX12-NEXT: retq 508 ; 509 ; AVX512F-LABEL: v16f32: 510 ; AVX512F: # %bb.0: 511 ; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1 512 ; AVX512F-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1} 513 ; AVX512F-NEXT: kmovw %k0, %eax 514 ; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax 515 ; AVX512F-NEXT: vzeroupper 516 ; AVX512F-NEXT: retq 517 ; 518 ; AVX512BW-LABEL: v16f32: 519 ; AVX512BW: # %bb.0: 520 ; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k1 521 ; AVX512BW-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1} 522 ; AVX512BW-NEXT: kmovd %k0, %eax 523 ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax 524 ; AVX512BW-NEXT: vzeroupper 525 ; AVX512BW-NEXT: retq 526 %x0 = fcmp ogt <16 x float> %a, %b 527 %x1 = fcmp ogt <16 x float> %c, %d 528 %y = and <16 x i1> %x0, %x1 529 %res = bitcast <16 x i1> %y to i16 530 ret i16 %res 531 } 532 533 define i64 @v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) { 534 ; SSE-LABEL: v64i8: 535 ; SSE: # %bb.0: 536 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 537 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 538 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 539 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 540 ; SSE-NEXT: pcmpgtb %xmm7, %xmm3 541 ; SSE-NEXT: pcmpgtb %xmm6, %xmm2 542 ; SSE-NEXT: pcmpgtb %xmm5, %xmm1 543 ; SSE-NEXT: pcmpgtb %xmm4, %xmm0 544 ; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm9 545 ; SSE-NEXT: pand %xmm3, %xmm9 546 ; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm8 547 ; SSE-NEXT: pand %xmm2, %xmm8 548 ; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm11 549 ; SSE-NEXT: pand %xmm1, %xmm11 550 ; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm10 551 ; SSE-NEXT: pand %xmm0, %xmm10 552 ; SSE-NEXT: pmovmskb %xmm10, %eax 553 ; SSE-NEXT: pmovmskb %xmm11, %ecx 554 ; SSE-NEXT: shll $16, %ecx 555 ; SSE-NEXT: orl %eax, %ecx 556 ; SSE-NEXT: pmovmskb %xmm8, %edx 557 ; SSE-NEXT: pmovmskb %xmm9, %eax 558 ; SSE-NEXT: shll $16, %eax 559 ; SSE-NEXT: orl %edx, %eax 560 ; SSE-NEXT: shlq $32, %rax 561 ; SSE-NEXT: orq %rcx, %rax 562 ; SSE-NEXT: retq 563 ; 564 ; AVX1-LABEL: v64i8: 565 ; AVX1: # %bb.0: 566 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8 567 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 568 ; AVX1-NEXT: vpcmpgtb %xmm8, %xmm9, %xmm8 569 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm9 570 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 571 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 572 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1 573 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 574 ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm2 575 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm3 576 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 577 ; AVX1-NEXT: vpand %xmm2, %xmm8, %xmm2 578 ; AVX1-NEXT: vpcmpgtb %xmm7, %xmm5, %xmm3 579 ; AVX1-NEXT: vpand %xmm3, %xmm9, %xmm3 580 ; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm5 581 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm7 582 ; AVX1-NEXT: vpcmpgtb %xmm5, %xmm7, %xmm5 583 ; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 584 ; AVX1-NEXT: vpcmpgtb %xmm6, %xmm4, %xmm4 585 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 586 ; AVX1-NEXT: vpmovmskb %xmm0, %eax 587 ; AVX1-NEXT: vpmovmskb %xmm1, %ecx 588 ; AVX1-NEXT: shll $16, %ecx 589 ; AVX1-NEXT: orl %eax, %ecx 590 ; AVX1-NEXT: vpmovmskb %xmm3, %edx 591 ; AVX1-NEXT: vpmovmskb %xmm2, %eax 592 ; AVX1-NEXT: shll $16, %eax 593 ; AVX1-NEXT: orl %edx, %eax 594 ; AVX1-NEXT: shlq $32, %rax 595 ; AVX1-NEXT: orq %rcx, %rax 596 ; AVX1-NEXT: vzeroupper 597 ; AVX1-NEXT: retq 598 ; 599 ; AVX2-LABEL: v64i8: 600 ; AVX2: # %bb.0: 601 ; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1 602 ; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 603 ; AVX2-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm2 604 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 605 ; AVX2-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2 606 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 607 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx 608 ; AVX2-NEXT: vpmovmskb %ymm1, %eax 609 ; AVX2-NEXT: shlq $32, %rax 610 ; AVX2-NEXT: orq %rcx, %rax 611 ; AVX2-NEXT: vzeroupper 612 ; AVX2-NEXT: retq 613 ; 614 ; AVX512F-LABEL: v64i8: 615 ; AVX512F: # %bb.0: 616 ; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1 617 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 618 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 619 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1 620 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 621 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k2 622 ; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 623 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 624 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 625 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k3 626 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 627 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k4 628 ; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm0 629 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 630 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 631 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 632 ; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2 633 ; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 634 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 635 ; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 636 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 {%k4} 637 ; AVX512F-NEXT: kmovw %k0, %eax 638 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 {%k3} 639 ; AVX512F-NEXT: kmovw %k0, %ecx 640 ; AVX512F-NEXT: shll $16, %ecx 641 ; AVX512F-NEXT: orl %eax, %ecx 642 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2} 643 ; AVX512F-NEXT: kmovw %k0, %edx 644 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k1} 645 ; AVX512F-NEXT: kmovw %k0, %eax 646 ; AVX512F-NEXT: shll $16, %eax 647 ; AVX512F-NEXT: orl %edx, %eax 648 ; AVX512F-NEXT: shlq $32, %rax 649 ; AVX512F-NEXT: orq %rcx, %rax 650 ; AVX512F-NEXT: vzeroupper 651 ; AVX512F-NEXT: retq 652 ; 653 ; AVX512BW-LABEL: v64i8: 654 ; AVX512BW: # %bb.0: 655 ; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k1 656 ; AVX512BW-NEXT: vpcmpgtb %zmm3, %zmm2, %k0 {%k1} 657 ; AVX512BW-NEXT: kmovq %k0, %rax 658 ; AVX512BW-NEXT: vzeroupper 659 ; AVX512BW-NEXT: retq 660 %x0 = icmp sgt <64 x i8> %a, %b 661 %x1 = icmp sgt <64 x i8> %c, %d 662 %y = and <64 x i1> %x0, %x1 663 %res = bitcast <64 x i1> %y to i64 664 ret i64 %res 665 } 666