Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSE2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSSE3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
      8 
      9 define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
     10 ; SSE2-SSSE3-LABEL: v4i64:
     11 ; SSE2-SSSE3:       # %bb.0:
     12 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0]
     13 ; SSE2-SSSE3-NEXT:    pxor %xmm8, %xmm3
     14 ; SSE2-SSSE3-NEXT:    pxor %xmm8, %xmm1
     15 ; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm9
     16 ; SSE2-SSSE3-NEXT:    pcmpgtd %xmm3, %xmm9
     17 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2]
     18 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm3, %xmm1
     19 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
     20 ; SSE2-SSSE3-NEXT:    pand %xmm10, %xmm1
     21 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm9[1,1,3,3]
     22 ; SSE2-SSSE3-NEXT:    por %xmm1, %xmm3
     23 ; SSE2-SSSE3-NEXT:    pxor %xmm8, %xmm2
     24 ; SSE2-SSSE3-NEXT:    pxor %xmm8, %xmm0
     25 ; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm1
     26 ; SSE2-SSSE3-NEXT:    pcmpgtd %xmm2, %xmm1
     27 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm1[0,0,2,2]
     28 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm0
     29 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
     30 ; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm2
     31 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
     32 ; SSE2-SSSE3-NEXT:    por %xmm2, %xmm0
     33 ; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
     34 ; SSE2-SSSE3-NEXT:    pxor %xmm8, %xmm7
     35 ; SSE2-SSSE3-NEXT:    pxor %xmm8, %xmm5
     36 ; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm1
     37 ; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm1
     38 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
     39 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm7, %xmm5
     40 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
     41 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm3
     42 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
     43 ; SSE2-SSSE3-NEXT:    por %xmm3, %xmm1
     44 ; SSE2-SSSE3-NEXT:    pxor %xmm8, %xmm6
     45 ; SSE2-SSSE3-NEXT:    pxor %xmm8, %xmm4
     46 ; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm2
     47 ; SSE2-SSSE3-NEXT:    pcmpgtd %xmm6, %xmm2
     48 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
     49 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm4
     50 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
     51 ; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm4
     52 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
     53 ; SSE2-SSSE3-NEXT:    por %xmm4, %xmm2
     54 ; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
     55 ; SSE2-SSSE3-NEXT:    andps %xmm0, %xmm2
     56 ; SSE2-SSSE3-NEXT:    movmskps %xmm2, %eax
     57 ; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
     58 ; SSE2-SSSE3-NEXT:    retq
     59 ;
     60 ; AVX1-LABEL: v4i64:
     61 ; AVX1:       # %bb.0:
     62 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
     63 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
     64 ; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
     65 ; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
     66 ; AVX1-NEXT:    vpackssdw %xmm4, %xmm0, %xmm0
     67 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm1
     68 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
     69 ; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm4, %xmm1
     70 ; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
     71 ; AVX1-NEXT:    vpackssdw %xmm1, %xmm2, %xmm1
     72 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
     73 ; AVX1-NEXT:    vmovmskps %xmm0, %eax
     74 ; AVX1-NEXT:    # kill: def $al killed $al killed $eax
     75 ; AVX1-NEXT:    vzeroupper
     76 ; AVX1-NEXT:    retq
     77 ;
     78 ; AVX2-LABEL: v4i64:
     79 ; AVX2:       # %bb.0:
     80 ; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
     81 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
     82 ; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
     83 ; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm1
     84 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
     85 ; AVX2-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
     86 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
     87 ; AVX2-NEXT:    vmovmskps %xmm0, %eax
     88 ; AVX2-NEXT:    # kill: def $al killed $al killed $eax
     89 ; AVX2-NEXT:    vzeroupper
     90 ; AVX2-NEXT:    retq
     91 ;
     92 ; AVX512F-LABEL: v4i64:
     93 ; AVX512F:       # %bb.0:
     94 ; AVX512F-NEXT:    vpcmpgtq %ymm1, %ymm0, %k1
     95 ; AVX512F-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
     96 ; AVX512F-NEXT:    kmovw %k0, %eax
     97 ; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
     98 ; AVX512F-NEXT:    vzeroupper
     99 ; AVX512F-NEXT:    retq
    100 ;
    101 ; AVX512BW-LABEL: v4i64:
    102 ; AVX512BW:       # %bb.0:
    103 ; AVX512BW-NEXT:    vpcmpgtq %ymm1, %ymm0, %k1
    104 ; AVX512BW-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
    105 ; AVX512BW-NEXT:    kmovd %k0, %eax
    106 ; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
    107 ; AVX512BW-NEXT:    vzeroupper
    108 ; AVX512BW-NEXT:    retq
    109   %x0 = icmp sgt <4 x i64> %a, %b
    110   %x1 = icmp sgt <4 x i64> %c, %d
    111   %y = and <4 x i1> %x0, %x1
    112   %res = bitcast <4 x i1> %y to i4
    113   ret i4 %res
    114 }
    115 
    116 define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
    117 ; SSE2-SSSE3-LABEL: v4f64:
    118 ; SSE2-SSSE3:       # %bb.0:
    119 ; SSE2-SSSE3-NEXT:    cmpltpd %xmm1, %xmm3
    120 ; SSE2-SSSE3-NEXT:    cmpltpd %xmm0, %xmm2
    121 ; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
    122 ; SSE2-SSSE3-NEXT:    cmpltpd %xmm5, %xmm7
    123 ; SSE2-SSSE3-NEXT:    cmpltpd %xmm4, %xmm6
    124 ; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
    125 ; SSE2-SSSE3-NEXT:    andps %xmm2, %xmm6
    126 ; SSE2-SSSE3-NEXT:    movmskps %xmm6, %eax
    127 ; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
    128 ; SSE2-SSSE3-NEXT:    retq
    129 ;
    130 ; AVX12-LABEL: v4f64:
    131 ; AVX12:       # %bb.0:
    132 ; AVX12-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
    133 ; AVX12-NEXT:    vextractf128 $1, %ymm0, %xmm1
    134 ; AVX12-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
    135 ; AVX12-NEXT:    vcmpltpd %ymm2, %ymm3, %ymm1
    136 ; AVX12-NEXT:    vextractf128 $1, %ymm1, %xmm2
    137 ; AVX12-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
    138 ; AVX12-NEXT:    vpand %xmm1, %xmm0, %xmm0
    139 ; AVX12-NEXT:    vmovmskps %xmm0, %eax
    140 ; AVX12-NEXT:    # kill: def $al killed $al killed $eax
    141 ; AVX12-NEXT:    vzeroupper
    142 ; AVX12-NEXT:    retq
    143 ;
    144 ; AVX512F-LABEL: v4f64:
    145 ; AVX512F:       # %bb.0:
    146 ; AVX512F-NEXT:    vcmpltpd %ymm0, %ymm1, %k1
    147 ; AVX512F-NEXT:    vcmpltpd %ymm2, %ymm3, %k0 {%k1}
    148 ; AVX512F-NEXT:    kmovw %k0, %eax
    149 ; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
    150 ; AVX512F-NEXT:    vzeroupper
    151 ; AVX512F-NEXT:    retq
    152 ;
    153 ; AVX512BW-LABEL: v4f64:
    154 ; AVX512BW:       # %bb.0:
    155 ; AVX512BW-NEXT:    vcmpltpd %ymm0, %ymm1, %k1
    156 ; AVX512BW-NEXT:    vcmpltpd %ymm2, %ymm3, %k0 {%k1}
    157 ; AVX512BW-NEXT:    kmovd %k0, %eax
    158 ; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
    159 ; AVX512BW-NEXT:    vzeroupper
    160 ; AVX512BW-NEXT:    retq
    161   %x0 = fcmp ogt <4 x double> %a, %b
    162   %x1 = fcmp ogt <4 x double> %c, %d
    163   %y = and <4 x i1> %x0, %x1
    164   %res = bitcast <4 x i1> %y to i4
    165   ret i4 %res
    166 }
    167 
    168 define i16 @v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i16> %d) {
    169 ; SSE2-SSSE3-LABEL: v16i16:
    170 ; SSE2-SSSE3:       # %bb.0:
    171 ; SSE2-SSSE3-NEXT:    pcmpgtw %xmm3, %xmm1
    172 ; SSE2-SSSE3-NEXT:    pcmpgtw %xmm2, %xmm0
    173 ; SSE2-SSSE3-NEXT:    packsswb %xmm1, %xmm0
    174 ; SSE2-SSSE3-NEXT:    pcmpgtw %xmm7, %xmm5
    175 ; SSE2-SSSE3-NEXT:    pcmpgtw %xmm6, %xmm4
    176 ; SSE2-SSSE3-NEXT:    packsswb %xmm5, %xmm4
    177 ; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm4
    178 ; SSE2-SSSE3-NEXT:    pmovmskb %xmm4, %eax
    179 ; SSE2-SSSE3-NEXT:    # kill: def $ax killed $ax killed $eax
    180 ; SSE2-SSSE3-NEXT:    retq
    181 ;
    182 ; AVX1-LABEL: v16i16:
    183 ; AVX1:       # %bb.0:
    184 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
    185 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
    186 ; AVX1-NEXT:    vpcmpgtw %xmm4, %xmm5, %xmm4
    187 ; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
    188 ; AVX1-NEXT:    vpacksswb %xmm4, %xmm0, %xmm0
    189 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm1
    190 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
    191 ; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm4, %xmm1
    192 ; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm2, %xmm2
    193 ; AVX1-NEXT:    vpacksswb %xmm1, %xmm2, %xmm1
    194 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
    195 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
    196 ; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
    197 ; AVX1-NEXT:    vzeroupper
    198 ; AVX1-NEXT:    retq
    199 ;
    200 ; AVX2-LABEL: v16i16:
    201 ; AVX2:       # %bb.0:
    202 ; AVX2-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
    203 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    204 ; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
    205 ; AVX2-NEXT:    vpcmpgtw %ymm3, %ymm2, %ymm1
    206 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
    207 ; AVX2-NEXT:    vpacksswb %xmm2, %xmm1, %xmm1
    208 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
    209 ; AVX2-NEXT:    vpmovmskb %xmm0, %eax
    210 ; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
    211 ; AVX2-NEXT:    vzeroupper
    212 ; AVX2-NEXT:    retq
    213 ;
    214 ; AVX512F-LABEL: v16i16:
    215 ; AVX512F:       # %bb.0:
    216 ; AVX512F-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
    217 ; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
    218 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
    219 ; AVX512F-NEXT:    vpcmpgtw %ymm3, %ymm2, %ymm0
    220 ; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
    221 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0 {%k1}
    222 ; AVX512F-NEXT:    kmovw %k0, %eax
    223 ; AVX512F-NEXT:    # kill: def $ax killed $ax killed $eax
    224 ; AVX512F-NEXT:    vzeroupper
    225 ; AVX512F-NEXT:    retq
    226 ;
    227 ; AVX512BW-LABEL: v16i16:
    228 ; AVX512BW:       # %bb.0:
    229 ; AVX512BW-NEXT:    vpcmpgtw %ymm1, %ymm0, %k1
    230 ; AVX512BW-NEXT:    vpcmpgtw %ymm3, %ymm2, %k0 {%k1}
    231 ; AVX512BW-NEXT:    kmovd %k0, %eax
    232 ; AVX512BW-NEXT:    # kill: def $ax killed $ax killed $eax
    233 ; AVX512BW-NEXT:    vzeroupper
    234 ; AVX512BW-NEXT:    retq
    235   %x0 = icmp sgt <16 x i16> %a, %b
    236   %x1 = icmp sgt <16 x i16> %c, %d
    237   %y = and <16 x i1> %x0, %x1
    238   %res = bitcast <16 x i1> %y to i16
    239   ret i16 %res
    240 }
    241 
    242 define i8 @v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
    243 ; SSE2-SSSE3-LABEL: v8i32:
    244 ; SSE2-SSSE3:       # %bb.0:
    245 ; SSE2-SSSE3-NEXT:    pcmpgtd %xmm3, %xmm1
    246 ; SSE2-SSSE3-NEXT:    pcmpgtd %xmm2, %xmm0
    247 ; SSE2-SSSE3-NEXT:    packssdw %xmm1, %xmm0
    248 ; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm5
    249 ; SSE2-SSSE3-NEXT:    pcmpgtd %xmm6, %xmm4
    250 ; SSE2-SSSE3-NEXT:    packssdw %xmm5, %xmm4
    251 ; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm4
    252 ; SSE2-SSSE3-NEXT:    packsswb %xmm0, %xmm4
    253 ; SSE2-SSSE3-NEXT:    pmovmskb %xmm4, %eax
    254 ; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
    255 ; SSE2-SSSE3-NEXT:    retq
    256 ;
    257 ; AVX1-LABEL: v8i32:
    258 ; AVX1:       # %bb.0:
    259 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
    260 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
    261 ; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm5, %xmm4
    262 ; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
    263 ; AVX1-NEXT:    vpackssdw %xmm4, %xmm0, %xmm0
    264 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm1
    265 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
    266 ; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm4, %xmm1
    267 ; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm2, %xmm2
    268 ; AVX1-NEXT:    vpackssdw %xmm1, %xmm2, %xmm1
    269 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
    270 ; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
    271 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
    272 ; AVX1-NEXT:    # kill: def $al killed $al killed $eax
    273 ; AVX1-NEXT:    vzeroupper
    274 ; AVX1-NEXT:    retq
    275 ;
    276 ; AVX2-LABEL: v8i32:
    277 ; AVX2:       # %bb.0:
    278 ; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
    279 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    280 ; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
    281 ; AVX2-NEXT:    vpcmpgtd %ymm3, %ymm2, %ymm1
    282 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
    283 ; AVX2-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
    284 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
    285 ; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
    286 ; AVX2-NEXT:    vpmovmskb %xmm0, %eax
    287 ; AVX2-NEXT:    # kill: def $al killed $al killed $eax
    288 ; AVX2-NEXT:    vzeroupper
    289 ; AVX2-NEXT:    retq
    290 ;
    291 ; AVX512F-LABEL: v8i32:
    292 ; AVX512F:       # %bb.0:
    293 ; AVX512F-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1
    294 ; AVX512F-NEXT:    vpcmpgtd %ymm3, %ymm2, %k0 {%k1}
    295 ; AVX512F-NEXT:    kmovw %k0, %eax
    296 ; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
    297 ; AVX512F-NEXT:    vzeroupper
    298 ; AVX512F-NEXT:    retq
    299 ;
    300 ; AVX512BW-LABEL: v8i32:
    301 ; AVX512BW:       # %bb.0:
    302 ; AVX512BW-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1
    303 ; AVX512BW-NEXT:    vpcmpgtd %ymm3, %ymm2, %k0 {%k1}
    304 ; AVX512BW-NEXT:    kmovd %k0, %eax
    305 ; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
    306 ; AVX512BW-NEXT:    vzeroupper
    307 ; AVX512BW-NEXT:    retq
    308   %x0 = icmp sgt <8 x i32> %a, %b
    309   %x1 = icmp sgt <8 x i32> %c, %d
    310   %y = and <8 x i1> %x0, %x1
    311   %res = bitcast <8 x i1> %y to i8
    312   ret i8 %res
    313 }
    314 
    315 define i8 @v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
    316 ; SSE2-SSSE3-LABEL: v8f32:
    317 ; SSE2-SSSE3:       # %bb.0:
    318 ; SSE2-SSSE3-NEXT:    cmpltps %xmm1, %xmm3
    319 ; SSE2-SSSE3-NEXT:    cmpltps %xmm0, %xmm2
    320 ; SSE2-SSSE3-NEXT:    packssdw %xmm3, %xmm2
    321 ; SSE2-SSSE3-NEXT:    cmpltps %xmm5, %xmm7
    322 ; SSE2-SSSE3-NEXT:    cmpltps %xmm4, %xmm6
    323 ; SSE2-SSSE3-NEXT:    packssdw %xmm7, %xmm6
    324 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm6
    325 ; SSE2-SSSE3-NEXT:    packsswb %xmm0, %xmm6
    326 ; SSE2-SSSE3-NEXT:    pmovmskb %xmm6, %eax
    327 ; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
    328 ; SSE2-SSSE3-NEXT:    retq
    329 ;
    330 ; AVX12-LABEL: v8f32:
    331 ; AVX12:       # %bb.0:
    332 ; AVX12-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
    333 ; AVX12-NEXT:    vextractf128 $1, %ymm0, %xmm1
    334 ; AVX12-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
    335 ; AVX12-NEXT:    vcmpltps %ymm2, %ymm3, %ymm1
    336 ; AVX12-NEXT:    vextractf128 $1, %ymm1, %xmm2
    337 ; AVX12-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
    338 ; AVX12-NEXT:    vpand %xmm1, %xmm0, %xmm0
    339 ; AVX12-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
    340 ; AVX12-NEXT:    vpmovmskb %xmm0, %eax
    341 ; AVX12-NEXT:    # kill: def $al killed $al killed $eax
    342 ; AVX12-NEXT:    vzeroupper
    343 ; AVX12-NEXT:    retq
    344 ;
    345 ; AVX512F-LABEL: v8f32:
    346 ; AVX512F:       # %bb.0:
    347 ; AVX512F-NEXT:    vcmpltps %ymm0, %ymm1, %k1
    348 ; AVX512F-NEXT:    vcmpltps %ymm2, %ymm3, %k0 {%k1}
    349 ; AVX512F-NEXT:    kmovw %k0, %eax
    350 ; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
    351 ; AVX512F-NEXT:    vzeroupper
    352 ; AVX512F-NEXT:    retq
    353 ;
    354 ; AVX512BW-LABEL: v8f32:
    355 ; AVX512BW:       # %bb.0:
    356 ; AVX512BW-NEXT:    vcmpltps %ymm0, %ymm1, %k1
    357 ; AVX512BW-NEXT:    vcmpltps %ymm2, %ymm3, %k0 {%k1}
    358 ; AVX512BW-NEXT:    kmovd %k0, %eax
    359 ; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
    360 ; AVX512BW-NEXT:    vzeroupper
    361 ; AVX512BW-NEXT:    retq
    362   %x0 = fcmp ogt <8 x float> %a, %b
    363   %x1 = fcmp ogt <8 x float> %c, %d
    364   %y = and <8 x i1> %x0, %x1
    365   %res = bitcast <8 x i1> %y to i8
    366   ret i8 %res
    367 }
    368 
    369 define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
    370 ; SSE2-SSSE3-LABEL: v32i8:
    371 ; SSE2-SSSE3:       # %bb.0:
    372 ; SSE2-SSSE3-NEXT:    pcmpgtb %xmm2, %xmm0
    373 ; SSE2-SSSE3-NEXT:    pcmpgtb %xmm3, %xmm1
    374 ; SSE2-SSSE3-NEXT:    pcmpgtb %xmm6, %xmm4
    375 ; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm4
    376 ; SSE2-SSSE3-NEXT:    pcmpgtb %xmm7, %xmm5
    377 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm5
    378 ; SSE2-SSSE3-NEXT:    pmovmskb %xmm4, %ecx
    379 ; SSE2-SSSE3-NEXT:    pmovmskb %xmm5, %eax
    380 ; SSE2-SSSE3-NEXT:    shll $16, %eax
    381 ; SSE2-SSSE3-NEXT:    orl %ecx, %eax
    382 ; SSE2-SSSE3-NEXT:    retq
    383 ;
    384 ; AVX1-LABEL: v32i8:
    385 ; AVX1:       # %bb.0:
    386 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
    387 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
    388 ; AVX1-NEXT:    vpcmpgtb %xmm4, %xmm5, %xmm4
    389 ; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
    390 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm1
    391 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
    392 ; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm5, %xmm1
    393 ; AVX1-NEXT:    vpand %xmm1, %xmm4, %xmm1
    394 ; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm2, %xmm2
    395 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
    396 ; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
    397 ; AVX1-NEXT:    vpmovmskb %xmm1, %eax
    398 ; AVX1-NEXT:    shll $16, %eax
    399 ; AVX1-NEXT:    orl %ecx, %eax
    400 ; AVX1-NEXT:    vzeroupper
    401 ; AVX1-NEXT:    retq
    402 ;
    403 ; AVX2-LABEL: v32i8:
    404 ; AVX2:       # %bb.0:
    405 ; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
    406 ; AVX2-NEXT:    vpcmpgtb %ymm3, %ymm2, %ymm1
    407 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    408 ; AVX2-NEXT:    vpmovmskb %ymm0, %eax
    409 ; AVX2-NEXT:    vzeroupper
    410 ; AVX2-NEXT:    retq
    411 ;
    412 ; AVX512F-LABEL: v32i8:
    413 ; AVX512F:       # %bb.0:
    414 ; AVX512F-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
    415 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
    416 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
    417 ; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k1
    418 ; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
    419 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k2
    420 ; AVX512F-NEXT:    vpcmpgtb %ymm3, %ymm2, %ymm0
    421 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
    422 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
    423 ; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
    424 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0 {%k2}
    425 ; AVX512F-NEXT:    kmovw %k0, %ecx
    426 ; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k0 {%k1}
    427 ; AVX512F-NEXT:    kmovw %k0, %eax
    428 ; AVX512F-NEXT:    shll $16, %eax
    429 ; AVX512F-NEXT:    orl %ecx, %eax
    430 ; AVX512F-NEXT:    vzeroupper
    431 ; AVX512F-NEXT:    retq
    432 ;
    433 ; AVX512BW-LABEL: v32i8:
    434 ; AVX512BW:       # %bb.0:
    435 ; AVX512BW-NEXT:    vpcmpgtb %ymm1, %ymm0, %k1
    436 ; AVX512BW-NEXT:    vpcmpgtb %ymm3, %ymm2, %k0 {%k1}
    437 ; AVX512BW-NEXT:    kmovd %k0, %eax
    438 ; AVX512BW-NEXT:    vzeroupper
    439 ; AVX512BW-NEXT:    retq
    440   %x0 = icmp sgt <32 x i8> %a, %b
    441   %x1 = icmp sgt <32 x i8> %c, %d
    442   %y = and <32 x i1> %x0, %x1
    443   %res = bitcast <32 x i1> %y to i32
    444   ret i32 %res
    445 }
    446