1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSE2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSSE3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW 8 9 define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) { 10 ; SSE2-SSSE3-LABEL: v4i64: 11 ; SSE2-SSSE3: # %bb.0: 12 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0] 13 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm3 14 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm1 15 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm9 16 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm9 17 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] 18 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm1 19 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 20 ; SSE2-SSSE3-NEXT: pand %xmm10, %xmm1 21 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm9[1,1,3,3] 22 ; SSE2-SSSE3-NEXT: por %xmm1, %xmm3 23 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm2 24 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm0 25 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm1 26 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm1 27 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm1[0,0,2,2] 28 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 29 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 30 ; SSE2-SSSE3-NEXT: pand %xmm9, %xmm2 31 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 32 ; SSE2-SSSE3-NEXT: por %xmm2, %xmm0 33 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2] 34 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm7 35 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm5 36 ; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm1 37 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm1 38 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2] 39 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm7, %xmm5 40 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3] 41 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3 42 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 43 ; SSE2-SSSE3-NEXT: por %xmm3, %xmm1 44 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm6 45 ; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm4 46 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2 47 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm2 48 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 49 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4 50 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 51 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4 52 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 53 ; SSE2-SSSE3-NEXT: por %xmm4, %xmm2 54 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2] 55 ; SSE2-SSSE3-NEXT: andps %xmm0, %xmm2 56 ; SSE2-SSSE3-NEXT: movmskps %xmm2, %eax 57 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 58 ; SSE2-SSSE3-NEXT: retq 59 ; 60 ; AVX1-LABEL: v4i64: 61 ; AVX1: # %bb.0: 62 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 63 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 64 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 65 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 66 ; AVX1-NEXT: vpackssdw %xmm4, %xmm0, %xmm0 67 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1 68 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 69 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1 70 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 71 ; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 72 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 73 ; AVX1-NEXT: vmovmskps %xmm0, %eax 74 ; AVX1-NEXT: # kill: def $al killed $al killed $eax 75 ; AVX1-NEXT: vzeroupper 76 ; AVX1-NEXT: retq 77 ; 78 ; AVX2-LABEL: v4i64: 79 ; AVX2: # %bb.0: 80 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 81 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 82 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 83 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 84 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 85 ; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 86 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 87 ; AVX2-NEXT: vmovmskps %xmm0, %eax 88 ; AVX2-NEXT: # kill: def $al killed $al killed $eax 89 ; AVX2-NEXT: vzeroupper 90 ; AVX2-NEXT: retq 91 ; 92 ; AVX512F-LABEL: v4i64: 93 ; AVX512F: # %bb.0: 94 ; AVX512F-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 95 ; AVX512F-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} 96 ; AVX512F-NEXT: kmovw %k0, %eax 97 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 98 ; AVX512F-NEXT: vzeroupper 99 ; AVX512F-NEXT: retq 100 ; 101 ; AVX512BW-LABEL: v4i64: 102 ; AVX512BW: # %bb.0: 103 ; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 104 ; AVX512BW-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} 105 ; AVX512BW-NEXT: kmovd %k0, %eax 106 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 107 ; AVX512BW-NEXT: vzeroupper 108 ; AVX512BW-NEXT: retq 109 %x0 = icmp sgt <4 x i64> %a, %b 110 %x1 = icmp sgt <4 x i64> %c, %d 111 %y = and <4 x i1> %x0, %x1 112 %res = bitcast <4 x i1> %y to i4 113 ret i4 %res 114 } 115 116 define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) { 117 ; SSE2-SSSE3-LABEL: v4f64: 118 ; SSE2-SSSE3: # %bb.0: 119 ; SSE2-SSSE3-NEXT: cmpltpd %xmm1, %xmm3 120 ; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm2 121 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 122 ; SSE2-SSSE3-NEXT: cmpltpd %xmm5, %xmm7 123 ; SSE2-SSSE3-NEXT: cmpltpd %xmm4, %xmm6 124 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2] 125 ; SSE2-SSSE3-NEXT: andps %xmm2, %xmm6 126 ; SSE2-SSSE3-NEXT: movmskps %xmm6, %eax 127 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 128 ; SSE2-SSSE3-NEXT: retq 129 ; 130 ; AVX12-LABEL: v4f64: 131 ; AVX12: # %bb.0: 132 ; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 133 ; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1 134 ; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 135 ; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1 136 ; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2 137 ; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 138 ; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 139 ; AVX12-NEXT: vmovmskps %xmm0, %eax 140 ; AVX12-NEXT: # kill: def $al killed $al killed $eax 141 ; AVX12-NEXT: vzeroupper 142 ; AVX12-NEXT: retq 143 ; 144 ; AVX512F-LABEL: v4f64: 145 ; AVX512F: # %bb.0: 146 ; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %k1 147 ; AVX512F-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1} 148 ; AVX512F-NEXT: kmovw %k0, %eax 149 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 150 ; AVX512F-NEXT: vzeroupper 151 ; AVX512F-NEXT: retq 152 ; 153 ; AVX512BW-LABEL: v4f64: 154 ; AVX512BW: # %bb.0: 155 ; AVX512BW-NEXT: vcmpltpd %ymm0, %ymm1, %k1 156 ; AVX512BW-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1} 157 ; AVX512BW-NEXT: kmovd %k0, %eax 158 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 159 ; AVX512BW-NEXT: vzeroupper 160 ; AVX512BW-NEXT: retq 161 %x0 = fcmp ogt <4 x double> %a, %b 162 %x1 = fcmp ogt <4 x double> %c, %d 163 %y = and <4 x i1> %x0, %x1 164 %res = bitcast <4 x i1> %y to i4 165 ret i4 %res 166 } 167 168 define i16 @v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i16> %d) { 169 ; SSE2-SSSE3-LABEL: v16i16: 170 ; SSE2-SSSE3: # %bb.0: 171 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm1 172 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm2, %xmm0 173 ; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0 174 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm7, %xmm5 175 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm6, %xmm4 176 ; SSE2-SSSE3-NEXT: packsswb %xmm5, %xmm4 177 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4 178 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax 179 ; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax 180 ; SSE2-SSSE3-NEXT: retq 181 ; 182 ; AVX1-LABEL: v16i16: 183 ; AVX1: # %bb.0: 184 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 185 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 186 ; AVX1-NEXT: vpcmpgtw %xmm4, %xmm5, %xmm4 187 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 188 ; AVX1-NEXT: vpacksswb %xmm4, %xmm0, %xmm0 189 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1 190 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 191 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm4, %xmm1 192 ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm2 193 ; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1 194 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 195 ; AVX1-NEXT: vpmovmskb %xmm0, %eax 196 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 197 ; AVX1-NEXT: vzeroupper 198 ; AVX1-NEXT: retq 199 ; 200 ; AVX2-LABEL: v16i16: 201 ; AVX2: # %bb.0: 202 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 203 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 204 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 205 ; AVX2-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1 206 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 207 ; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 208 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 209 ; AVX2-NEXT: vpmovmskb %xmm0, %eax 210 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 211 ; AVX2-NEXT: vzeroupper 212 ; AVX2-NEXT: retq 213 ; 214 ; AVX512F-LABEL: v16i16: 215 ; AVX512F: # %bb.0: 216 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 217 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 218 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 219 ; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm0 220 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 221 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} 222 ; AVX512F-NEXT: kmovw %k0, %eax 223 ; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax 224 ; AVX512F-NEXT: vzeroupper 225 ; AVX512F-NEXT: retq 226 ; 227 ; AVX512BW-LABEL: v16i16: 228 ; AVX512BW: # %bb.0: 229 ; AVX512BW-NEXT: vpcmpgtw %ymm1, %ymm0, %k1 230 ; AVX512BW-NEXT: vpcmpgtw %ymm3, %ymm2, %k0 {%k1} 231 ; AVX512BW-NEXT: kmovd %k0, %eax 232 ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax 233 ; AVX512BW-NEXT: vzeroupper 234 ; AVX512BW-NEXT: retq 235 %x0 = icmp sgt <16 x i16> %a, %b 236 %x1 = icmp sgt <16 x i16> %c, %d 237 %y = and <16 x i1> %x0, %x1 238 %res = bitcast <16 x i1> %y to i16 239 ret i16 %res 240 } 241 242 define i8 @v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) { 243 ; SSE2-SSSE3-LABEL: v8i32: 244 ; SSE2-SSSE3: # %bb.0: 245 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1 246 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0 247 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 248 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5 249 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4 250 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4 251 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4 252 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm4 253 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax 254 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 255 ; SSE2-SSSE3-NEXT: retq 256 ; 257 ; AVX1-LABEL: v8i32: 258 ; AVX1: # %bb.0: 259 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 260 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 261 ; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4 262 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 263 ; AVX1-NEXT: vpackssdw %xmm4, %xmm0, %xmm0 264 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1 265 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 266 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm4, %xmm1 267 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2 268 ; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 269 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 270 ; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 271 ; AVX1-NEXT: vpmovmskb %xmm0, %eax 272 ; AVX1-NEXT: # kill: def $al killed $al killed $eax 273 ; AVX1-NEXT: vzeroupper 274 ; AVX1-NEXT: retq 275 ; 276 ; AVX2-LABEL: v8i32: 277 ; AVX2: # %bb.0: 278 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 279 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 280 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 281 ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm1 282 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 283 ; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 284 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 285 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 286 ; AVX2-NEXT: vpmovmskb %xmm0, %eax 287 ; AVX2-NEXT: # kill: def $al killed $al killed $eax 288 ; AVX2-NEXT: vzeroupper 289 ; AVX2-NEXT: retq 290 ; 291 ; AVX512F-LABEL: v8i32: 292 ; AVX512F: # %bb.0: 293 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 294 ; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1} 295 ; AVX512F-NEXT: kmovw %k0, %eax 296 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 297 ; AVX512F-NEXT: vzeroupper 298 ; AVX512F-NEXT: retq 299 ; 300 ; AVX512BW-LABEL: v8i32: 301 ; AVX512BW: # %bb.0: 302 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 303 ; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1} 304 ; AVX512BW-NEXT: kmovd %k0, %eax 305 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 306 ; AVX512BW-NEXT: vzeroupper 307 ; AVX512BW-NEXT: retq 308 %x0 = icmp sgt <8 x i32> %a, %b 309 %x1 = icmp sgt <8 x i32> %c, %d 310 %y = and <8 x i1> %x0, %x1 311 %res = bitcast <8 x i1> %y to i8 312 ret i8 %res 313 } 314 315 define i8 @v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) { 316 ; SSE2-SSSE3-LABEL: v8f32: 317 ; SSE2-SSSE3: # %bb.0: 318 ; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3 319 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2 320 ; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2 321 ; SSE2-SSSE3-NEXT: cmpltps %xmm5, %xmm7 322 ; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6 323 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6 324 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm6 325 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm6 326 ; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax 327 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 328 ; SSE2-SSSE3-NEXT: retq 329 ; 330 ; AVX12-LABEL: v8f32: 331 ; AVX12: # %bb.0: 332 ; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 333 ; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1 334 ; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 335 ; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm1 336 ; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2 337 ; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 338 ; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 339 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 340 ; AVX12-NEXT: vpmovmskb %xmm0, %eax 341 ; AVX12-NEXT: # kill: def $al killed $al killed $eax 342 ; AVX12-NEXT: vzeroupper 343 ; AVX12-NEXT: retq 344 ; 345 ; AVX512F-LABEL: v8f32: 346 ; AVX512F: # %bb.0: 347 ; AVX512F-NEXT: vcmpltps %ymm0, %ymm1, %k1 348 ; AVX512F-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1} 349 ; AVX512F-NEXT: kmovw %k0, %eax 350 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 351 ; AVX512F-NEXT: vzeroupper 352 ; AVX512F-NEXT: retq 353 ; 354 ; AVX512BW-LABEL: v8f32: 355 ; AVX512BW: # %bb.0: 356 ; AVX512BW-NEXT: vcmpltps %ymm0, %ymm1, %k1 357 ; AVX512BW-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1} 358 ; AVX512BW-NEXT: kmovd %k0, %eax 359 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 360 ; AVX512BW-NEXT: vzeroupper 361 ; AVX512BW-NEXT: retq 362 %x0 = fcmp ogt <8 x float> %a, %b 363 %x1 = fcmp ogt <8 x float> %c, %d 364 %y = and <8 x i1> %x0, %x1 365 %res = bitcast <8 x i1> %y to i8 366 ret i8 %res 367 } 368 369 define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) { 370 ; SSE2-SSSE3-LABEL: v32i8: 371 ; SSE2-SSSE3: # %bb.0: 372 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0 373 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1 374 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm6, %xmm4 375 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4 376 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5 377 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5 378 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %ecx 379 ; SSE2-SSSE3-NEXT: pmovmskb %xmm5, %eax 380 ; SSE2-SSSE3-NEXT: shll $16, %eax 381 ; SSE2-SSSE3-NEXT: orl %ecx, %eax 382 ; SSE2-SSSE3-NEXT: retq 383 ; 384 ; AVX1-LABEL: v32i8: 385 ; AVX1: # %bb.0: 386 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 387 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 388 ; AVX1-NEXT: vpcmpgtb %xmm4, %xmm5, %xmm4 389 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 390 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1 391 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 392 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm5, %xmm1 393 ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1 394 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2 395 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 396 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx 397 ; AVX1-NEXT: vpmovmskb %xmm1, %eax 398 ; AVX1-NEXT: shll $16, %eax 399 ; AVX1-NEXT: orl %ecx, %eax 400 ; AVX1-NEXT: vzeroupper 401 ; AVX1-NEXT: retq 402 ; 403 ; AVX2-LABEL: v32i8: 404 ; AVX2: # %bb.0: 405 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 406 ; AVX2-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1 407 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 408 ; AVX2-NEXT: vpmovmskb %ymm0, %eax 409 ; AVX2-NEXT: vzeroupper 410 ; AVX2-NEXT: retq 411 ; 412 ; AVX512F-LABEL: v32i8: 413 ; AVX512F: # %bb.0: 414 ; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 415 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 416 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 417 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 418 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 419 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 420 ; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm0 421 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 422 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 423 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 424 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2} 425 ; AVX512F-NEXT: kmovw %k0, %ecx 426 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k1} 427 ; AVX512F-NEXT: kmovw %k0, %eax 428 ; AVX512F-NEXT: shll $16, %eax 429 ; AVX512F-NEXT: orl %ecx, %eax 430 ; AVX512F-NEXT: vzeroupper 431 ; AVX512F-NEXT: retq 432 ; 433 ; AVX512BW-LABEL: v32i8: 434 ; AVX512BW: # %bb.0: 435 ; AVX512BW-NEXT: vpcmpgtb %ymm1, %ymm0, %k1 436 ; AVX512BW-NEXT: vpcmpgtb %ymm3, %ymm2, %k0 {%k1} 437 ; AVX512BW-NEXT: kmovd %k0, %eax 438 ; AVX512BW-NEXT: vzeroupper 439 ; AVX512BW-NEXT: retq 440 %x0 = icmp sgt <32 x i8> %a, %b 441 %x1 = icmp sgt <32 x i8> %c, %d 442 %y = and <32 x i1> %x0, %x1 443 %res = bitcast <32 x i1> %y to i32 444 ret i32 %res 445 } 446