1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSE2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSSE3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512 --check-prefixes=AVX512F 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512 --check-prefixes=AVX512BW 8 9 define i16 @v16i16(<16 x i16> %a, <16 x i16> %b) { 10 ; SSE2-SSSE3-LABEL: v16i16: 11 ; SSE2-SSSE3: # %bb.0: 12 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm1 13 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm2, %xmm0 14 ; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0 15 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 16 ; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax 17 ; SSE2-SSSE3-NEXT: retq 18 ; 19 ; AVX1-LABEL: v16i16: 20 ; AVX1: # %bb.0: 21 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 22 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 23 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 24 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 25 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 26 ; AVX1-NEXT: vpmovmskb %xmm0, %eax 27 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 28 ; AVX1-NEXT: vzeroupper 29 ; AVX1-NEXT: retq 30 ; 31 ; AVX2-LABEL: v16i16: 32 ; AVX2: # %bb.0: 33 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 34 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 35 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 36 ; AVX2-NEXT: vpmovmskb %xmm0, %eax 37 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 38 ; AVX2-NEXT: vzeroupper 39 ; AVX2-NEXT: retq 40 ; 41 ; AVX512F-LABEL: v16i16: 42 ; AVX512F: # %bb.0: 43 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 44 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 45 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 46 ; AVX512F-NEXT: kmovw %k0, %eax 47 ; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax 48 ; AVX512F-NEXT: vzeroupper 49 ; AVX512F-NEXT: retq 50 ; 51 ; AVX512BW-LABEL: v16i16: 52 ; AVX512BW: # %bb.0: 53 ; AVX512BW-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 54 ; AVX512BW-NEXT: kmovd %k0, %eax 55 ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax 56 ; AVX512BW-NEXT: vzeroupper 57 ; AVX512BW-NEXT: retq 58 %x = icmp sgt <16 x i16> %a, %b 59 %res = bitcast <16 x i1> %x to i16 60 ret i16 %res 61 } 62 63 define i8 @v8i32(<8 x i32> %a, <8 x i32> %b) { 64 ; SSE2-SSSE3-LABEL: v8i32: 65 ; SSE2-SSSE3: # %bb.0: 66 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1 67 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0 68 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 69 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0 70 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 71 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 72 ; SSE2-SSSE3-NEXT: retq 73 ; 74 ; AVX1-LABEL: v8i32: 75 ; AVX1: # %bb.0: 76 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 77 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 78 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 79 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 80 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 81 ; AVX1-NEXT: vmovmskps %ymm0, %eax 82 ; AVX1-NEXT: # kill: def $al killed $al killed $eax 83 ; AVX1-NEXT: vzeroupper 84 ; AVX1-NEXT: retq 85 ; 86 ; AVX2-LABEL: v8i32: 87 ; AVX2: # %bb.0: 88 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 89 ; AVX2-NEXT: vmovmskps %ymm0, %eax 90 ; AVX2-NEXT: # kill: def $al killed $al killed $eax 91 ; AVX2-NEXT: vzeroupper 92 ; AVX2-NEXT: retq 93 ; 94 ; AVX512F-LABEL: v8i32: 95 ; AVX512F: # %bb.0: 96 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 97 ; AVX512F-NEXT: kmovw %k0, %eax 98 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 99 ; AVX512F-NEXT: vzeroupper 100 ; AVX512F-NEXT: retq 101 ; 102 ; AVX512BW-LABEL: v8i32: 103 ; AVX512BW: # %bb.0: 104 ; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 105 ; AVX512BW-NEXT: kmovd %k0, %eax 106 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 107 ; AVX512BW-NEXT: vzeroupper 108 ; AVX512BW-NEXT: retq 109 %x = icmp sgt <8 x i32> %a, %b 110 %res = bitcast <8 x i1> %x to i8 111 ret i8 %res 112 } 113 114 define i8 @v8f32(<8 x float> %a, <8 x float> %b) { 115 ; SSE2-SSSE3-LABEL: v8f32: 116 ; SSE2-SSSE3: # %bb.0: 117 ; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3 118 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2 119 ; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2 120 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm2 121 ; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax 122 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 123 ; SSE2-SSSE3-NEXT: retq 124 ; 125 ; AVX12-LABEL: v8f32: 126 ; AVX12: # %bb.0: 127 ; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 128 ; AVX12-NEXT: vmovmskps %ymm0, %eax 129 ; AVX12-NEXT: # kill: def $al killed $al killed $eax 130 ; AVX12-NEXT: vzeroupper 131 ; AVX12-NEXT: retq 132 ; 133 ; AVX512F-LABEL: v8f32: 134 ; AVX512F: # %bb.0: 135 ; AVX512F-NEXT: vcmpltps %ymm0, %ymm1, %k0 136 ; AVX512F-NEXT: kmovw %k0, %eax 137 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 138 ; AVX512F-NEXT: vzeroupper 139 ; AVX512F-NEXT: retq 140 ; 141 ; AVX512BW-LABEL: v8f32: 142 ; AVX512BW: # %bb.0: 143 ; AVX512BW-NEXT: vcmpltps %ymm0, %ymm1, %k0 144 ; AVX512BW-NEXT: kmovd %k0, %eax 145 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 146 ; AVX512BW-NEXT: vzeroupper 147 ; AVX512BW-NEXT: retq 148 %x = fcmp ogt <8 x float> %a, %b 149 %res = bitcast <8 x i1> %x to i8 150 ret i8 %res 151 } 152 153 define i32 @v32i8(<32 x i8> %a, <32 x i8> %b) { 154 ; SSE2-SSSE3-LABEL: v32i8: 155 ; SSE2-SSSE3: # %bb.0: 156 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0 157 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %ecx 158 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1 159 ; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %eax 160 ; SSE2-SSSE3-NEXT: shll $16, %eax 161 ; SSE2-SSSE3-NEXT: orl %ecx, %eax 162 ; SSE2-SSSE3-NEXT: retq 163 ; 164 ; AVX1-LABEL: v32i8: 165 ; AVX1: # %bb.0: 166 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm2 167 ; AVX1-NEXT: vpmovmskb %xmm2, %ecx 168 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 169 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 170 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 171 ; AVX1-NEXT: vpmovmskb %xmm0, %eax 172 ; AVX1-NEXT: shll $16, %eax 173 ; AVX1-NEXT: orl %ecx, %eax 174 ; AVX1-NEXT: vzeroupper 175 ; AVX1-NEXT: retq 176 ; 177 ; AVX2-LABEL: v32i8: 178 ; AVX2: # %bb.0: 179 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 180 ; AVX2-NEXT: vpmovmskb %ymm0, %eax 181 ; AVX2-NEXT: vzeroupper 182 ; AVX2-NEXT: retq 183 ; 184 ; AVX512F-LABEL: v32i8: 185 ; AVX512F: # %bb.0: 186 ; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 187 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 188 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 189 ; AVX512F-NEXT: kmovw %k0, %ecx 190 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 191 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 192 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 193 ; AVX512F-NEXT: kmovw %k0, %eax 194 ; AVX512F-NEXT: shll $16, %eax 195 ; AVX512F-NEXT: orl %ecx, %eax 196 ; AVX512F-NEXT: vzeroupper 197 ; AVX512F-NEXT: retq 198 ; 199 ; AVX512BW-LABEL: v32i8: 200 ; AVX512BW: # %bb.0: 201 ; AVX512BW-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 202 ; AVX512BW-NEXT: kmovd %k0, %eax 203 ; AVX512BW-NEXT: vzeroupper 204 ; AVX512BW-NEXT: retq 205 %x = icmp sgt <32 x i8> %a, %b 206 %res = bitcast <32 x i1> %x to i32 207 ret i32 %res 208 } 209 210 define i4 @v4i64(<4 x i64> %a, <4 x i64> %b) { 211 ; SSE2-SSSE3-LABEL: v4i64: 212 ; SSE2-SSSE3: # %bb.0: 213 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0] 214 ; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm3 215 ; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm1 216 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm5 217 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm5 218 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 219 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm1 220 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 221 ; SSE2-SSSE3-NEXT: pand %xmm6, %xmm1 222 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3] 223 ; SSE2-SSSE3-NEXT: por %xmm1, %xmm3 224 ; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm2 225 ; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm0 226 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm1 227 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm1 228 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2] 229 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 230 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 231 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 232 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 233 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm1 234 ; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm1 235 ; SSE2-SSSE3-NEXT: movmskps %xmm1, %eax 236 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 237 ; SSE2-SSSE3-NEXT: retq 238 ; 239 ; AVX1-LABEL: v4i64: 240 ; AVX1: # %bb.0: 241 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 242 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 243 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 244 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 245 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 246 ; AVX1-NEXT: vmovmskpd %ymm0, %eax 247 ; AVX1-NEXT: # kill: def $al killed $al killed $eax 248 ; AVX1-NEXT: vzeroupper 249 ; AVX1-NEXT: retq 250 ; 251 ; AVX2-LABEL: v4i64: 252 ; AVX2: # %bb.0: 253 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 254 ; AVX2-NEXT: vmovmskpd %ymm0, %eax 255 ; AVX2-NEXT: # kill: def $al killed $al killed $eax 256 ; AVX2-NEXT: vzeroupper 257 ; AVX2-NEXT: retq 258 ; 259 ; AVX512F-LABEL: v4i64: 260 ; AVX512F: # %bb.0: 261 ; AVX512F-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 262 ; AVX512F-NEXT: kmovw %k0, %eax 263 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 264 ; AVX512F-NEXT: vzeroupper 265 ; AVX512F-NEXT: retq 266 ; 267 ; AVX512BW-LABEL: v4i64: 268 ; AVX512BW: # %bb.0: 269 ; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 270 ; AVX512BW-NEXT: kmovd %k0, %eax 271 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 272 ; AVX512BW-NEXT: vzeroupper 273 ; AVX512BW-NEXT: retq 274 %x = icmp sgt <4 x i64> %a, %b 275 %res = bitcast <4 x i1> %x to i4 276 ret i4 %res 277 } 278 279 define i4 @v4f64(<4 x double> %a, <4 x double> %b) { 280 ; SSE2-SSSE3-LABEL: v4f64: 281 ; SSE2-SSSE3: # %bb.0: 282 ; SSE2-SSSE3-NEXT: cmpltpd %xmm1, %xmm3 283 ; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm2 284 ; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2 285 ; SSE2-SSSE3-NEXT: movmskps %xmm2, %eax 286 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 287 ; SSE2-SSSE3-NEXT: retq 288 ; 289 ; AVX12-LABEL: v4f64: 290 ; AVX12: # %bb.0: 291 ; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 292 ; AVX12-NEXT: vmovmskpd %ymm0, %eax 293 ; AVX12-NEXT: # kill: def $al killed $al killed $eax 294 ; AVX12-NEXT: vzeroupper 295 ; AVX12-NEXT: retq 296 ; 297 ; AVX512F-LABEL: v4f64: 298 ; AVX512F: # %bb.0: 299 ; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %k0 300 ; AVX512F-NEXT: kmovw %k0, %eax 301 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 302 ; AVX512F-NEXT: vzeroupper 303 ; AVX512F-NEXT: retq 304 ; 305 ; AVX512BW-LABEL: v4f64: 306 ; AVX512BW: # %bb.0: 307 ; AVX512BW-NEXT: vcmpltpd %ymm0, %ymm1, %k0 308 ; AVX512BW-NEXT: kmovd %k0, %eax 309 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 310 ; AVX512BW-NEXT: vzeroupper 311 ; AVX512BW-NEXT: retq 312 %x = fcmp ogt <4 x double> %a, %b 313 %res = bitcast <4 x i1> %x to i4 314 ret i4 %res 315 } 316