1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512 --check-prefixes=AVX512F 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512 --check-prefixes=AVX512BW 8 9 define i8 @v8i16(<8 x i16> %a, <8 x i16> %b) { 10 ; SSE2-SSSE3-LABEL: v8i16: 11 ; SSE2-SSSE3: # %bb.0: 12 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 13 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0 14 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 15 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 16 ; SSE2-SSSE3-NEXT: retq 17 ; 18 ; AVX12-LABEL: v8i16: 19 ; AVX12: # %bb.0: 20 ; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 21 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 22 ; AVX12-NEXT: vpmovmskb %xmm0, %eax 23 ; AVX12-NEXT: # kill: def $al killed $al killed $eax 24 ; AVX12-NEXT: retq 25 ; 26 ; AVX512F-LABEL: v8i16: 27 ; AVX512F: # %bb.0: 28 ; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 29 ; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0 30 ; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0 31 ; AVX512F-NEXT: kmovw %k0, %eax 32 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 33 ; AVX512F-NEXT: vzeroupper 34 ; AVX512F-NEXT: retq 35 ; 36 ; AVX512BW-LABEL: v8i16: 37 ; AVX512BW: # %bb.0: 38 ; AVX512BW-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 39 ; AVX512BW-NEXT: kmovd %k0, %eax 40 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 41 ; AVX512BW-NEXT: retq 42 %x = icmp sgt <8 x i16> %a, %b 43 %res = bitcast <8 x i1> %x to i8 44 ret i8 %res 45 } 46 47 define i4 @v4i32(<4 x i32> %a, <4 x i32> %b) { 48 ; SSE2-SSSE3-LABEL: v4i32: 49 ; SSE2-SSSE3: # %bb.0: 50 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0 51 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax 52 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 53 ; SSE2-SSSE3-NEXT: retq 54 ; 55 ; AVX12-LABEL: v4i32: 56 ; AVX12: # %bb.0: 57 ; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 58 ; AVX12-NEXT: vmovmskps %xmm0, %eax 59 ; AVX12-NEXT: # kill: def $al killed $al killed $eax 60 ; AVX12-NEXT: retq 61 ; 62 ; AVX512F-LABEL: v4i32: 63 ; AVX512F: # %bb.0: 64 ; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 65 ; AVX512F-NEXT: kmovw %k0, %eax 66 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 67 ; AVX512F-NEXT: retq 68 ; 69 ; AVX512BW-LABEL: v4i32: 70 ; AVX512BW: # %bb.0: 71 ; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 72 ; AVX512BW-NEXT: kmovd %k0, %eax 73 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 74 ; AVX512BW-NEXT: retq 75 %x = icmp sgt <4 x i32> %a, %b 76 %res = bitcast <4 x i1> %x to i4 77 ret i4 %res 78 } 79 80 define i4 @v4f32(<4 x float> %a, <4 x float> %b) { 81 ; SSE2-SSSE3-LABEL: v4f32: 82 ; SSE2-SSSE3: # %bb.0: 83 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm1 84 ; SSE2-SSSE3-NEXT: movmskps %xmm1, %eax 85 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 86 ; SSE2-SSSE3-NEXT: retq 87 ; 88 ; AVX12-LABEL: v4f32: 89 ; AVX12: # %bb.0: 90 ; AVX12-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 91 ; AVX12-NEXT: vmovmskps %xmm0, %eax 92 ; AVX12-NEXT: # kill: def $al killed $al killed $eax 93 ; AVX12-NEXT: retq 94 ; 95 ; AVX512F-LABEL: v4f32: 96 ; AVX512F: # %bb.0: 97 ; AVX512F-NEXT: vcmpltps %xmm0, %xmm1, %k0 98 ; AVX512F-NEXT: kmovw %k0, %eax 99 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 100 ; AVX512F-NEXT: retq 101 ; 102 ; AVX512BW-LABEL: v4f32: 103 ; AVX512BW: # %bb.0: 104 ; AVX512BW-NEXT: vcmpltps %xmm0, %xmm1, %k0 105 ; AVX512BW-NEXT: kmovd %k0, %eax 106 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 107 ; AVX512BW-NEXT: retq 108 %x = fcmp ogt <4 x float> %a, %b 109 %res = bitcast <4 x i1> %x to i4 110 ret i4 %res 111 } 112 113 define i16 @v16i8(<16 x i8> %a, <16 x i8> %b) { 114 ; SSE2-SSSE3-LABEL: v16i8: 115 ; SSE2-SSSE3: # %bb.0: 116 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0 117 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 118 ; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax 119 ; SSE2-SSSE3-NEXT: retq 120 ; 121 ; AVX12-LABEL: v16i8: 122 ; AVX12: # %bb.0: 123 ; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 124 ; AVX12-NEXT: vpmovmskb %xmm0, %eax 125 ; AVX12-NEXT: # kill: def $ax killed $ax killed $eax 126 ; AVX12-NEXT: retq 127 ; 128 ; AVX512F-LABEL: v16i8: 129 ; AVX512F: # %bb.0: 130 ; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 131 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 132 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 133 ; AVX512F-NEXT: kmovw %k0, %eax 134 ; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax 135 ; AVX512F-NEXT: vzeroupper 136 ; AVX512F-NEXT: retq 137 ; 138 ; AVX512BW-LABEL: v16i8: 139 ; AVX512BW: # %bb.0: 140 ; AVX512BW-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 141 ; AVX512BW-NEXT: kmovd %k0, %eax 142 ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax 143 ; AVX512BW-NEXT: retq 144 %x = icmp sgt <16 x i8> %a, %b 145 %res = bitcast <16 x i1> %x to i16 146 ret i16 %res 147 } 148 149 define i2 @v2i8(<2 x i8> %a, <2 x i8> %b) { 150 ; SSE2-SSSE3-LABEL: v2i8: 151 ; SSE2-SSSE3: # %bb.0: 152 ; SSE2-SSSE3-NEXT: psllq $56, %xmm0 153 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2 154 ; SSE2-SSSE3-NEXT: psrad $31, %xmm2 155 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 156 ; SSE2-SSSE3-NEXT: psrad $24, %xmm0 157 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 158 ; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 159 ; SSE2-SSSE3-NEXT: psllq $56, %xmm1 160 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm2 161 ; SSE2-SSSE3-NEXT: psrad $31, %xmm2 162 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 163 ; SSE2-SSSE3-NEXT: psrad $24, %xmm1 164 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 165 ; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 166 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 167 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1 168 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0 169 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2 170 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 171 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 172 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 173 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 174 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 175 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 176 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm1 177 ; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax 178 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 179 ; SSE2-SSSE3-NEXT: retq 180 ; 181 ; AVX1-LABEL: v2i8: 182 ; AVX1: # %bb.0: 183 ; AVX1-NEXT: vpsllq $56, %xmm1, %xmm1 184 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 185 ; AVX1-NEXT: vpsrad $24, %xmm1, %xmm1 186 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 187 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 188 ; AVX1-NEXT: vpsllq $56, %xmm0, %xmm0 189 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 190 ; AVX1-NEXT: vpsrad $24, %xmm0, %xmm0 191 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 192 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 193 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 194 ; AVX1-NEXT: vmovmskpd %xmm0, %eax 195 ; AVX1-NEXT: # kill: def $al killed $al killed $eax 196 ; AVX1-NEXT: retq 197 ; 198 ; AVX2-LABEL: v2i8: 199 ; AVX2: # %bb.0: 200 ; AVX2-NEXT: vpsllq $56, %xmm1, %xmm1 201 ; AVX2-NEXT: vpsrad $31, %xmm1, %xmm2 202 ; AVX2-NEXT: vpsrad $24, %xmm1, %xmm1 203 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 204 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 205 ; AVX2-NEXT: vpsllq $56, %xmm0, %xmm0 206 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2 207 ; AVX2-NEXT: vpsrad $24, %xmm0, %xmm0 208 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 209 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 210 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 211 ; AVX2-NEXT: vmovmskpd %xmm0, %eax 212 ; AVX2-NEXT: # kill: def $al killed $al killed $eax 213 ; AVX2-NEXT: retq 214 ; 215 ; AVX512F-LABEL: v2i8: 216 ; AVX512F: # %bb.0: 217 ; AVX512F-NEXT: vpsllq $56, %xmm1, %xmm1 218 ; AVX512F-NEXT: vpsraq $56, %xmm1, %xmm1 219 ; AVX512F-NEXT: vpsllq $56, %xmm0, %xmm0 220 ; AVX512F-NEXT: vpsraq $56, %xmm0, %xmm0 221 ; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 222 ; AVX512F-NEXT: kmovw %k0, %eax 223 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 224 ; AVX512F-NEXT: retq 225 ; 226 ; AVX512BW-LABEL: v2i8: 227 ; AVX512BW: # %bb.0: 228 ; AVX512BW-NEXT: vpsllq $56, %xmm1, %xmm1 229 ; AVX512BW-NEXT: vpsraq $56, %xmm1, %xmm1 230 ; AVX512BW-NEXT: vpsllq $56, %xmm0, %xmm0 231 ; AVX512BW-NEXT: vpsraq $56, %xmm0, %xmm0 232 ; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 233 ; AVX512BW-NEXT: kmovd %k0, %eax 234 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 235 ; AVX512BW-NEXT: retq 236 %x = icmp sgt <2 x i8> %a, %b 237 %res = bitcast <2 x i1> %x to i2 238 ret i2 %res 239 } 240 241 define i2 @v2i16(<2 x i16> %a, <2 x i16> %b) { 242 ; SSE2-SSSE3-LABEL: v2i16: 243 ; SSE2-SSSE3: # %bb.0: 244 ; SSE2-SSSE3-NEXT: psllq $48, %xmm0 245 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2 246 ; SSE2-SSSE3-NEXT: psrad $31, %xmm2 247 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 248 ; SSE2-SSSE3-NEXT: psrad $16, %xmm0 249 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 250 ; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 251 ; SSE2-SSSE3-NEXT: psllq $48, %xmm1 252 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm2 253 ; SSE2-SSSE3-NEXT: psrad $31, %xmm2 254 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 255 ; SSE2-SSSE3-NEXT: psrad $16, %xmm1 256 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 257 ; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 258 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 259 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1 260 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0 261 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2 262 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 263 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 264 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 265 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 266 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 267 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 268 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm1 269 ; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax 270 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 271 ; SSE2-SSSE3-NEXT: retq 272 ; 273 ; AVX1-LABEL: v2i16: 274 ; AVX1: # %bb.0: 275 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm1 276 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 277 ; AVX1-NEXT: vpsrad $16, %xmm1, %xmm1 278 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 279 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 280 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm0 281 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 282 ; AVX1-NEXT: vpsrad $16, %xmm0, %xmm0 283 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 284 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 285 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 286 ; AVX1-NEXT: vmovmskpd %xmm0, %eax 287 ; AVX1-NEXT: # kill: def $al killed $al killed $eax 288 ; AVX1-NEXT: retq 289 ; 290 ; AVX2-LABEL: v2i16: 291 ; AVX2: # %bb.0: 292 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1 293 ; AVX2-NEXT: vpsrad $31, %xmm1, %xmm2 294 ; AVX2-NEXT: vpsrad $16, %xmm1, %xmm1 295 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 296 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 297 ; AVX2-NEXT: vpsllq $48, %xmm0, %xmm0 298 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2 299 ; AVX2-NEXT: vpsrad $16, %xmm0, %xmm0 300 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 301 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 302 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 303 ; AVX2-NEXT: vmovmskpd %xmm0, %eax 304 ; AVX2-NEXT: # kill: def $al killed $al killed $eax 305 ; AVX2-NEXT: retq 306 ; 307 ; AVX512F-LABEL: v2i16: 308 ; AVX512F: # %bb.0: 309 ; AVX512F-NEXT: vpsllq $48, %xmm1, %xmm1 310 ; AVX512F-NEXT: vpsraq $48, %xmm1, %xmm1 311 ; AVX512F-NEXT: vpsllq $48, %xmm0, %xmm0 312 ; AVX512F-NEXT: vpsraq $48, %xmm0, %xmm0 313 ; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 314 ; AVX512F-NEXT: kmovw %k0, %eax 315 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 316 ; AVX512F-NEXT: retq 317 ; 318 ; AVX512BW-LABEL: v2i16: 319 ; AVX512BW: # %bb.0: 320 ; AVX512BW-NEXT: vpsllq $48, %xmm1, %xmm1 321 ; AVX512BW-NEXT: vpsraq $48, %xmm1, %xmm1 322 ; AVX512BW-NEXT: vpsllq $48, %xmm0, %xmm0 323 ; AVX512BW-NEXT: vpsraq $48, %xmm0, %xmm0 324 ; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 325 ; AVX512BW-NEXT: kmovd %k0, %eax 326 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 327 ; AVX512BW-NEXT: retq 328 %x = icmp sgt <2 x i16> %a, %b 329 %res = bitcast <2 x i1> %x to i2 330 ret i2 %res 331 } 332 333 define i2 @v2i32(<2 x i32> %a, <2 x i32> %b) { 334 ; SSE2-SSSE3-LABEL: v2i32: 335 ; SSE2-SSSE3: # %bb.0: 336 ; SSE2-SSSE3-NEXT: psllq $32, %xmm0 337 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] 338 ; SSE2-SSSE3-NEXT: psrad $31, %xmm0 339 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 340 ; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 341 ; SSE2-SSSE3-NEXT: psllq $32, %xmm1 342 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,3,2,3] 343 ; SSE2-SSSE3-NEXT: psrad $31, %xmm1 344 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 345 ; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 346 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,0,2147483648,0] 347 ; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm0 348 ; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm2 349 ; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1 350 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 351 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2] 352 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm2 353 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 354 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 355 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 356 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm1 357 ; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax 358 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 359 ; SSE2-SSSE3-NEXT: retq 360 ; 361 ; AVX1-LABEL: v2i32: 362 ; AVX1: # %bb.0: 363 ; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1 364 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 365 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 366 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 367 ; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 368 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 369 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 370 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 371 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 372 ; AVX1-NEXT: vmovmskpd %xmm0, %eax 373 ; AVX1-NEXT: # kill: def $al killed $al killed $eax 374 ; AVX1-NEXT: retq 375 ; 376 ; AVX2-LABEL: v2i32: 377 ; AVX2: # %bb.0: 378 ; AVX2-NEXT: vpsllq $32, %xmm1, %xmm1 379 ; AVX2-NEXT: vpsrad $31, %xmm1, %xmm2 380 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 381 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 382 ; AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 383 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2 384 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 385 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 386 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 387 ; AVX2-NEXT: vmovmskpd %xmm0, %eax 388 ; AVX2-NEXT: # kill: def $al killed $al killed $eax 389 ; AVX2-NEXT: retq 390 ; 391 ; AVX512F-LABEL: v2i32: 392 ; AVX512F: # %bb.0: 393 ; AVX512F-NEXT: vpsllq $32, %xmm1, %xmm1 394 ; AVX512F-NEXT: vpsraq $32, %xmm1, %xmm1 395 ; AVX512F-NEXT: vpsllq $32, %xmm0, %xmm0 396 ; AVX512F-NEXT: vpsraq $32, %xmm0, %xmm0 397 ; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 398 ; AVX512F-NEXT: kmovw %k0, %eax 399 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 400 ; AVX512F-NEXT: retq 401 ; 402 ; AVX512BW-LABEL: v2i32: 403 ; AVX512BW: # %bb.0: 404 ; AVX512BW-NEXT: vpsllq $32, %xmm1, %xmm1 405 ; AVX512BW-NEXT: vpsraq $32, %xmm1, %xmm1 406 ; AVX512BW-NEXT: vpsllq $32, %xmm0, %xmm0 407 ; AVX512BW-NEXT: vpsraq $32, %xmm0, %xmm0 408 ; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 409 ; AVX512BW-NEXT: kmovd %k0, %eax 410 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 411 ; AVX512BW-NEXT: retq 412 %x = icmp sgt <2 x i32> %a, %b 413 %res = bitcast <2 x i1> %x to i2 414 ret i2 %res 415 } 416 417 define i2 @v2i64(<2 x i64> %a, <2 x i64> %b) { 418 ; SSE2-SSSE3-LABEL: v2i64: 419 ; SSE2-SSSE3: # %bb.0: 420 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 421 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1 422 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0 423 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2 424 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 425 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 426 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 427 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 428 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 429 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 430 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm1 431 ; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax 432 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 433 ; SSE2-SSSE3-NEXT: retq 434 ; 435 ; AVX12-LABEL: v2i64: 436 ; AVX12: # %bb.0: 437 ; AVX12-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 438 ; AVX12-NEXT: vmovmskpd %xmm0, %eax 439 ; AVX12-NEXT: # kill: def $al killed $al killed $eax 440 ; AVX12-NEXT: retq 441 ; 442 ; AVX512F-LABEL: v2i64: 443 ; AVX512F: # %bb.0: 444 ; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 445 ; AVX512F-NEXT: kmovw %k0, %eax 446 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 447 ; AVX512F-NEXT: retq 448 ; 449 ; AVX512BW-LABEL: v2i64: 450 ; AVX512BW: # %bb.0: 451 ; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 452 ; AVX512BW-NEXT: kmovd %k0, %eax 453 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 454 ; AVX512BW-NEXT: retq 455 %x = icmp sgt <2 x i64> %a, %b 456 %res = bitcast <2 x i1> %x to i2 457 ret i2 %res 458 } 459 460 define i2 @v2f64(<2 x double> %a, <2 x double> %b) { 461 ; SSE2-SSSE3-LABEL: v2f64: 462 ; SSE2-SSSE3: # %bb.0: 463 ; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm1 464 ; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax 465 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 466 ; SSE2-SSSE3-NEXT: retq 467 ; 468 ; AVX12-LABEL: v2f64: 469 ; AVX12: # %bb.0: 470 ; AVX12-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 471 ; AVX12-NEXT: vmovmskpd %xmm0, %eax 472 ; AVX12-NEXT: # kill: def $al killed $al killed $eax 473 ; AVX12-NEXT: retq 474 ; 475 ; AVX512F-LABEL: v2f64: 476 ; AVX512F: # %bb.0: 477 ; AVX512F-NEXT: vcmpltpd %xmm0, %xmm1, %k0 478 ; AVX512F-NEXT: kmovw %k0, %eax 479 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 480 ; AVX512F-NEXT: retq 481 ; 482 ; AVX512BW-LABEL: v2f64: 483 ; AVX512BW: # %bb.0: 484 ; AVX512BW-NEXT: vcmpltpd %xmm0, %xmm1, %k0 485 ; AVX512BW-NEXT: kmovd %k0, %eax 486 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 487 ; AVX512BW-NEXT: retq 488 %x = fcmp ogt <2 x double> %a, %b 489 %res = bitcast <2 x i1> %x to i2 490 ret i2 %res 491 } 492 493 define i4 @v4i8(<4 x i8> %a, <4 x i8> %b) { 494 ; SSE2-SSSE3-LABEL: v4i8: 495 ; SSE2-SSSE3: # %bb.0: 496 ; SSE2-SSSE3-NEXT: pslld $24, %xmm1 497 ; SSE2-SSSE3-NEXT: psrad $24, %xmm1 498 ; SSE2-SSSE3-NEXT: pslld $24, %xmm0 499 ; SSE2-SSSE3-NEXT: psrad $24, %xmm0 500 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0 501 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax 502 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 503 ; SSE2-SSSE3-NEXT: retq 504 ; 505 ; AVX12-LABEL: v4i8: 506 ; AVX12: # %bb.0: 507 ; AVX12-NEXT: vpslld $24, %xmm1, %xmm1 508 ; AVX12-NEXT: vpsrad $24, %xmm1, %xmm1 509 ; AVX12-NEXT: vpslld $24, %xmm0, %xmm0 510 ; AVX12-NEXT: vpsrad $24, %xmm0, %xmm0 511 ; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 512 ; AVX12-NEXT: vmovmskps %xmm0, %eax 513 ; AVX12-NEXT: # kill: def $al killed $al killed $eax 514 ; AVX12-NEXT: retq 515 ; 516 ; AVX512F-LABEL: v4i8: 517 ; AVX512F: # %bb.0: 518 ; AVX512F-NEXT: vpslld $24, %xmm1, %xmm1 519 ; AVX512F-NEXT: vpsrad $24, %xmm1, %xmm1 520 ; AVX512F-NEXT: vpslld $24, %xmm0, %xmm0 521 ; AVX512F-NEXT: vpsrad $24, %xmm0, %xmm0 522 ; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 523 ; AVX512F-NEXT: kmovw %k0, %eax 524 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 525 ; AVX512F-NEXT: retq 526 ; 527 ; AVX512BW-LABEL: v4i8: 528 ; AVX512BW: # %bb.0: 529 ; AVX512BW-NEXT: vpslld $24, %xmm1, %xmm1 530 ; AVX512BW-NEXT: vpsrad $24, %xmm1, %xmm1 531 ; AVX512BW-NEXT: vpslld $24, %xmm0, %xmm0 532 ; AVX512BW-NEXT: vpsrad $24, %xmm0, %xmm0 533 ; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 534 ; AVX512BW-NEXT: kmovd %k0, %eax 535 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 536 ; AVX512BW-NEXT: retq 537 %x = icmp sgt <4 x i8> %a, %b 538 %res = bitcast <4 x i1> %x to i4 539 ret i4 %res 540 } 541 542 define i4 @v4i16(<4 x i16> %a, <4 x i16> %b) { 543 ; SSE2-SSSE3-LABEL: v4i16: 544 ; SSE2-SSSE3: # %bb.0: 545 ; SSE2-SSSE3-NEXT: pslld $16, %xmm1 546 ; SSE2-SSSE3-NEXT: psrad $16, %xmm1 547 ; SSE2-SSSE3-NEXT: pslld $16, %xmm0 548 ; SSE2-SSSE3-NEXT: psrad $16, %xmm0 549 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0 550 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax 551 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 552 ; SSE2-SSSE3-NEXT: retq 553 ; 554 ; AVX12-LABEL: v4i16: 555 ; AVX12: # %bb.0: 556 ; AVX12-NEXT: vpslld $16, %xmm1, %xmm1 557 ; AVX12-NEXT: vpsrad $16, %xmm1, %xmm1 558 ; AVX12-NEXT: vpslld $16, %xmm0, %xmm0 559 ; AVX12-NEXT: vpsrad $16, %xmm0, %xmm0 560 ; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 561 ; AVX12-NEXT: vmovmskps %xmm0, %eax 562 ; AVX12-NEXT: # kill: def $al killed $al killed $eax 563 ; AVX12-NEXT: retq 564 ; 565 ; AVX512F-LABEL: v4i16: 566 ; AVX512F: # %bb.0: 567 ; AVX512F-NEXT: vpslld $16, %xmm1, %xmm1 568 ; AVX512F-NEXT: vpsrad $16, %xmm1, %xmm1 569 ; AVX512F-NEXT: vpslld $16, %xmm0, %xmm0 570 ; AVX512F-NEXT: vpsrad $16, %xmm0, %xmm0 571 ; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 572 ; AVX512F-NEXT: kmovw %k0, %eax 573 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 574 ; AVX512F-NEXT: retq 575 ; 576 ; AVX512BW-LABEL: v4i16: 577 ; AVX512BW: # %bb.0: 578 ; AVX512BW-NEXT: vpslld $16, %xmm1, %xmm1 579 ; AVX512BW-NEXT: vpsrad $16, %xmm1, %xmm1 580 ; AVX512BW-NEXT: vpslld $16, %xmm0, %xmm0 581 ; AVX512BW-NEXT: vpsrad $16, %xmm0, %xmm0 582 ; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 583 ; AVX512BW-NEXT: kmovd %k0, %eax 584 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 585 ; AVX512BW-NEXT: retq 586 %x = icmp sgt <4 x i16> %a, %b 587 %res = bitcast <4 x i1> %x to i4 588 ret i4 %res 589 } 590 591 define i8 @v8i8(<8 x i8> %a, <8 x i8> %b) { 592 ; SSE2-SSSE3-LABEL: v8i8: 593 ; SSE2-SSSE3: # %bb.0: 594 ; SSE2-SSSE3-NEXT: psllw $8, %xmm1 595 ; SSE2-SSSE3-NEXT: psraw $8, %xmm1 596 ; SSE2-SSSE3-NEXT: psllw $8, %xmm0 597 ; SSE2-SSSE3-NEXT: psraw $8, %xmm0 598 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 599 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0 600 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 601 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 602 ; SSE2-SSSE3-NEXT: retq 603 ; 604 ; AVX12-LABEL: v8i8: 605 ; AVX12: # %bb.0: 606 ; AVX12-NEXT: vpsllw $8, %xmm1, %xmm1 607 ; AVX12-NEXT: vpsraw $8, %xmm1, %xmm1 608 ; AVX12-NEXT: vpsllw $8, %xmm0, %xmm0 609 ; AVX12-NEXT: vpsraw $8, %xmm0, %xmm0 610 ; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 611 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 612 ; AVX12-NEXT: vpmovmskb %xmm0, %eax 613 ; AVX12-NEXT: # kill: def $al killed $al killed $eax 614 ; AVX12-NEXT: retq 615 ; 616 ; AVX512F-LABEL: v8i8: 617 ; AVX512F: # %bb.0: 618 ; AVX512F-NEXT: vpsllw $8, %xmm1, %xmm1 619 ; AVX512F-NEXT: vpsraw $8, %xmm1, %xmm1 620 ; AVX512F-NEXT: vpsllw $8, %xmm0, %xmm0 621 ; AVX512F-NEXT: vpsraw $8, %xmm0, %xmm0 622 ; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 623 ; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0 624 ; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0 625 ; AVX512F-NEXT: kmovw %k0, %eax 626 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax 627 ; AVX512F-NEXT: vzeroupper 628 ; AVX512F-NEXT: retq 629 ; 630 ; AVX512BW-LABEL: v8i8: 631 ; AVX512BW: # %bb.0: 632 ; AVX512BW-NEXT: vpsllw $8, %xmm1, %xmm1 633 ; AVX512BW-NEXT: vpsraw $8, %xmm1, %xmm1 634 ; AVX512BW-NEXT: vpsllw $8, %xmm0, %xmm0 635 ; AVX512BW-NEXT: vpsraw $8, %xmm0, %xmm0 636 ; AVX512BW-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 637 ; AVX512BW-NEXT: kmovd %k0, %eax 638 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 639 ; AVX512BW-NEXT: retq 640 %x = icmp sgt <8 x i8> %a, %b 641 %res = bitcast <8 x i1> %x to i8 642 ret i8 %res 643 } 644 645 define i64 @v16i8_widened_with_zeroes(<16 x i8> %a, <16 x i8> %b) { 646 ; SSE2-SSSE3-LABEL: v16i8_widened_with_zeroes: 647 ; SSE2-SSSE3: # %bb.0: # %entry 648 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm1, %xmm0 649 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 650 ; SSE2-SSSE3-NEXT: retq 651 ; 652 ; AVX1-LABEL: v16i8_widened_with_zeroes: 653 ; AVX1: # %bb.0: # %entry 654 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 655 ; AVX1-NEXT: vpmovmskb %xmm0, %eax 656 ; AVX1-NEXT: retq 657 ; 658 ; AVX2-LABEL: v16i8_widened_with_zeroes: 659 ; AVX2: # %bb.0: # %entry 660 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 661 ; AVX2-NEXT: vpmovmskb %ymm0, %eax 662 ; AVX2-NEXT: vzeroupper 663 ; AVX2-NEXT: retq 664 ; 665 ; AVX512F-LABEL: v16i8_widened_with_zeroes: 666 ; AVX512F: # %bb.0: # %entry 667 ; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 668 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 669 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 670 ; AVX512F-NEXT: kmovw %k0, %eax 671 ; AVX512F-NEXT: movzwl %ax, %eax 672 ; AVX512F-NEXT: vzeroupper 673 ; AVX512F-NEXT: retq 674 ; 675 ; AVX512BW-LABEL: v16i8_widened_with_zeroes: 676 ; AVX512BW: # %bb.0: # %entry 677 ; AVX512BW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 678 ; AVX512BW-NEXT: kmovq %k0, %rax 679 ; AVX512BW-NEXT: retq 680 entry: 681 %c = icmp eq <16 x i8> %a, %b 682 %d = shufflevector <16 x i1> %c, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 683 %e = bitcast <64 x i1> %d to i64 684 ret i64 %e 685 } 686 687 define i64 @v16i8_widened_with_ones(<16 x i8> %a, <16 x i8> %b) { 688 ; SSE2-SSSE3-LABEL: v16i8_widened_with_ones: 689 ; SSE2-SSSE3: # %bb.0: # %entry 690 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm1, %xmm0 691 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %ecx 692 ; SSE2-SSSE3-NEXT: orl $-65536, %ecx # imm = 0xFFFF0000 693 ; SSE2-SSSE3-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 694 ; SSE2-SSSE3-NEXT: orq %rcx, %rax 695 ; SSE2-SSSE3-NEXT: retq 696 ; 697 ; AVX1-LABEL: v16i8_widened_with_ones: 698 ; AVX1: # %bb.0: # %entry 699 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 700 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 701 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 702 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 703 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 704 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx 705 ; AVX1-NEXT: orl $-65536, %ecx # imm = 0xFFFF0000 706 ; AVX1-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 707 ; AVX1-NEXT: orq %rcx, %rax 708 ; AVX1-NEXT: retq 709 ; 710 ; AVX2-LABEL: v16i8_widened_with_ones: 711 ; AVX2: # %bb.0: # %entry 712 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 713 ; AVX2-NEXT: vinserti128 $1, {{.*}}(%rip), %ymm0, %ymm0 714 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 715 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 716 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx 717 ; AVX2-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 718 ; AVX2-NEXT: orq %rcx, %rax 719 ; AVX2-NEXT: vzeroupper 720 ; AVX2-NEXT: retq 721 ; 722 ; AVX512F-LABEL: v16i8_widened_with_ones: 723 ; AVX512F: # %bb.0: # %entry 724 ; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 725 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 726 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 727 ; AVX512F-NEXT: kmovw %k0, %ecx 728 ; AVX512F-NEXT: orl $-65536, %ecx # imm = 0xFFFF0000 729 ; AVX512F-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 730 ; AVX512F-NEXT: orq %rcx, %rax 731 ; AVX512F-NEXT: vzeroupper 732 ; AVX512F-NEXT: retq 733 ; 734 ; AVX512BW-LABEL: v16i8_widened_with_ones: 735 ; AVX512BW: # %bb.0: # %entry 736 ; AVX512BW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 737 ; AVX512BW-NEXT: kxnorw %k0, %k0, %k1 738 ; AVX512BW-NEXT: kunpckwd %k0, %k1, %k0 739 ; AVX512BW-NEXT: kxnord %k0, %k0, %k1 740 ; AVX512BW-NEXT: kunpckdq %k0, %k1, %k0 741 ; AVX512BW-NEXT: kmovq %k0, %rax 742 ; AVX512BW-NEXT: retq 743 entry: 744 %c = icmp eq <16 x i8> %a, %b 745 %d = shufflevector <16 x i1> %c, <16 x i1> <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 746 %e = bitcast <64 x i1> %d to i64 747 ret i64 %e 748 } 749