Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSE2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSSE3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512 --check-prefixes=AVX512F
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512 --check-prefixes=AVX512BW
      8 
      9 define i16 @v16i16(<16 x i16> %a, <16 x i16> %b) {
     10 ; SSE2-SSSE3-LABEL: v16i16:
     11 ; SSE2-SSSE3:       # %bb.0:
     12 ; SSE2-SSSE3-NEXT:    pcmpgtw %xmm3, %xmm1
     13 ; SSE2-SSSE3-NEXT:    pcmpgtw %xmm2, %xmm0
     14 ; SSE2-SSSE3-NEXT:    packsswb %xmm1, %xmm0
     15 ; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
     16 ; SSE2-SSSE3-NEXT:    # kill: def $ax killed $ax killed $eax
     17 ; SSE2-SSSE3-NEXT:    retq
     18 ;
     19 ; AVX1-LABEL: v16i16:
     20 ; AVX1:       # %bb.0:
     21 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
     22 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
     23 ; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
     24 ; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
     25 ; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
     26 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
     27 ; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
     28 ; AVX1-NEXT:    vzeroupper
     29 ; AVX1-NEXT:    retq
     30 ;
     31 ; AVX2-LABEL: v16i16:
     32 ; AVX2:       # %bb.0:
     33 ; AVX2-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
     34 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
     35 ; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
     36 ; AVX2-NEXT:    vpmovmskb %xmm0, %eax
     37 ; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
     38 ; AVX2-NEXT:    vzeroupper
     39 ; AVX2-NEXT:    retq
     40 ;
     41 ; AVX512F-LABEL: v16i16:
     42 ; AVX512F:       # %bb.0:
     43 ; AVX512F-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
     44 ; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
     45 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
     46 ; AVX512F-NEXT:    kmovw %k0, %eax
     47 ; AVX512F-NEXT:    # kill: def $ax killed $ax killed $eax
     48 ; AVX512F-NEXT:    vzeroupper
     49 ; AVX512F-NEXT:    retq
     50 ;
     51 ; AVX512BW-LABEL: v16i16:
     52 ; AVX512BW:       # %bb.0:
     53 ; AVX512BW-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0
     54 ; AVX512BW-NEXT:    kmovd %k0, %eax
     55 ; AVX512BW-NEXT:    # kill: def $ax killed $ax killed $eax
     56 ; AVX512BW-NEXT:    vzeroupper
     57 ; AVX512BW-NEXT:    retq
     58   %x = icmp sgt <16 x i16> %a, %b
     59   %res = bitcast <16 x i1> %x to i16
     60   ret i16 %res
     61 }
     62 
     63 define i8 @v8i32(<8 x i32> %a, <8 x i32> %b) {
     64 ; SSE2-SSSE3-LABEL: v8i32:
     65 ; SSE2-SSSE3:       # %bb.0:
     66 ; SSE2-SSSE3-NEXT:    pcmpgtd %xmm3, %xmm1
     67 ; SSE2-SSSE3-NEXT:    pcmpgtd %xmm2, %xmm0
     68 ; SSE2-SSSE3-NEXT:    packssdw %xmm1, %xmm0
     69 ; SSE2-SSSE3-NEXT:    packsswb %xmm0, %xmm0
     70 ; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
     71 ; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
     72 ; SSE2-SSSE3-NEXT:    retq
     73 ;
     74 ; AVX1-LABEL: v8i32:
     75 ; AVX1:       # %bb.0:
     76 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
     77 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
     78 ; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
     79 ; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
     80 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
     81 ; AVX1-NEXT:    vmovmskps %ymm0, %eax
     82 ; AVX1-NEXT:    # kill: def $al killed $al killed $eax
     83 ; AVX1-NEXT:    vzeroupper
     84 ; AVX1-NEXT:    retq
     85 ;
     86 ; AVX2-LABEL: v8i32:
     87 ; AVX2:       # %bb.0:
     88 ; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
     89 ; AVX2-NEXT:    vmovmskps %ymm0, %eax
     90 ; AVX2-NEXT:    # kill: def $al killed $al killed $eax
     91 ; AVX2-NEXT:    vzeroupper
     92 ; AVX2-NEXT:    retq
     93 ;
     94 ; AVX512F-LABEL: v8i32:
     95 ; AVX512F:       # %bb.0:
     96 ; AVX512F-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
     97 ; AVX512F-NEXT:    kmovw %k0, %eax
     98 ; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
     99 ; AVX512F-NEXT:    vzeroupper
    100 ; AVX512F-NEXT:    retq
    101 ;
    102 ; AVX512BW-LABEL: v8i32:
    103 ; AVX512BW:       # %bb.0:
    104 ; AVX512BW-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
    105 ; AVX512BW-NEXT:    kmovd %k0, %eax
    106 ; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
    107 ; AVX512BW-NEXT:    vzeroupper
    108 ; AVX512BW-NEXT:    retq
    109   %x = icmp sgt <8 x i32> %a, %b
    110   %res = bitcast <8 x i1> %x to i8
    111   ret i8 %res
    112 }
    113 
    114 define i8 @v8f32(<8 x float> %a, <8 x float> %b) {
    115 ; SSE2-SSSE3-LABEL: v8f32:
    116 ; SSE2-SSSE3:       # %bb.0:
    117 ; SSE2-SSSE3-NEXT:    cmpltps %xmm1, %xmm3
    118 ; SSE2-SSSE3-NEXT:    cmpltps %xmm0, %xmm2
    119 ; SSE2-SSSE3-NEXT:    packssdw %xmm3, %xmm2
    120 ; SSE2-SSSE3-NEXT:    packsswb %xmm0, %xmm2
    121 ; SSE2-SSSE3-NEXT:    pmovmskb %xmm2, %eax
    122 ; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
    123 ; SSE2-SSSE3-NEXT:    retq
    124 ;
    125 ; AVX12-LABEL: v8f32:
    126 ; AVX12:       # %bb.0:
    127 ; AVX12-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
    128 ; AVX12-NEXT:    vmovmskps %ymm0, %eax
    129 ; AVX12-NEXT:    # kill: def $al killed $al killed $eax
    130 ; AVX12-NEXT:    vzeroupper
    131 ; AVX12-NEXT:    retq
    132 ;
    133 ; AVX512F-LABEL: v8f32:
    134 ; AVX512F:       # %bb.0:
    135 ; AVX512F-NEXT:    vcmpltps %ymm0, %ymm1, %k0
    136 ; AVX512F-NEXT:    kmovw %k0, %eax
    137 ; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
    138 ; AVX512F-NEXT:    vzeroupper
    139 ; AVX512F-NEXT:    retq
    140 ;
    141 ; AVX512BW-LABEL: v8f32:
    142 ; AVX512BW:       # %bb.0:
    143 ; AVX512BW-NEXT:    vcmpltps %ymm0, %ymm1, %k0
    144 ; AVX512BW-NEXT:    kmovd %k0, %eax
    145 ; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
    146 ; AVX512BW-NEXT:    vzeroupper
    147 ; AVX512BW-NEXT:    retq
    148   %x = fcmp ogt <8 x float> %a, %b
    149   %res = bitcast <8 x i1> %x to i8
    150   ret i8 %res
    151 }
    152 
    153 define i32 @v32i8(<32 x i8> %a, <32 x i8> %b) {
    154 ; SSE2-SSSE3-LABEL: v32i8:
    155 ; SSE2-SSSE3:       # %bb.0:
    156 ; SSE2-SSSE3-NEXT:    pcmpgtb %xmm2, %xmm0
    157 ; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %ecx
    158 ; SSE2-SSSE3-NEXT:    pcmpgtb %xmm3, %xmm1
    159 ; SSE2-SSSE3-NEXT:    pmovmskb %xmm1, %eax
    160 ; SSE2-SSSE3-NEXT:    shll $16, %eax
    161 ; SSE2-SSSE3-NEXT:    orl %ecx, %eax
    162 ; SSE2-SSSE3-NEXT:    retq
    163 ;
    164 ; AVX1-LABEL: v32i8:
    165 ; AVX1:       # %bb.0:
    166 ; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm2
    167 ; AVX1-NEXT:    vpmovmskb %xmm2, %ecx
    168 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
    169 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    170 ; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
    171 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
    172 ; AVX1-NEXT:    shll $16, %eax
    173 ; AVX1-NEXT:    orl %ecx, %eax
    174 ; AVX1-NEXT:    vzeroupper
    175 ; AVX1-NEXT:    retq
    176 ;
    177 ; AVX2-LABEL: v32i8:
    178 ; AVX2:       # %bb.0:
    179 ; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
    180 ; AVX2-NEXT:    vpmovmskb %ymm0, %eax
    181 ; AVX2-NEXT:    vzeroupper
    182 ; AVX2-NEXT:    retq
    183 ;
    184 ; AVX512F-LABEL: v32i8:
    185 ; AVX512F:       # %bb.0:
    186 ; AVX512F-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
    187 ; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm1
    188 ; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k0
    189 ; AVX512F-NEXT:    kmovw %k0, %ecx
    190 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
    191 ; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
    192 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
    193 ; AVX512F-NEXT:    kmovw %k0, %eax
    194 ; AVX512F-NEXT:    shll $16, %eax
    195 ; AVX512F-NEXT:    orl %ecx, %eax
    196 ; AVX512F-NEXT:    vzeroupper
    197 ; AVX512F-NEXT:    retq
    198 ;
    199 ; AVX512BW-LABEL: v32i8:
    200 ; AVX512BW:       # %bb.0:
    201 ; AVX512BW-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0
    202 ; AVX512BW-NEXT:    kmovd %k0, %eax
    203 ; AVX512BW-NEXT:    vzeroupper
    204 ; AVX512BW-NEXT:    retq
    205   %x = icmp sgt <32 x i8> %a, %b
    206   %res = bitcast <32 x i1> %x to i32
    207   ret i32 %res
    208 }
    209 
    210 define i4 @v4i64(<4 x i64> %a, <4 x i64> %b) {
    211 ; SSE2-SSSE3-LABEL: v4i64:
    212 ; SSE2-SSSE3:       # %bb.0:
    213 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
    214 ; SSE2-SSSE3-NEXT:    pxor %xmm4, %xmm3
    215 ; SSE2-SSSE3-NEXT:    pxor %xmm4, %xmm1
    216 ; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm5
    217 ; SSE2-SSSE3-NEXT:    pcmpgtd %xmm3, %xmm5
    218 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
    219 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm3, %xmm1
    220 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
    221 ; SSE2-SSSE3-NEXT:    pand %xmm6, %xmm1
    222 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
    223 ; SSE2-SSSE3-NEXT:    por %xmm1, %xmm3
    224 ; SSE2-SSSE3-NEXT:    pxor %xmm4, %xmm2
    225 ; SSE2-SSSE3-NEXT:    pxor %xmm4, %xmm0
    226 ; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm1
    227 ; SSE2-SSSE3-NEXT:    pcmpgtd %xmm2, %xmm1
    228 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
    229 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm0
    230 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
    231 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm0
    232 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
    233 ; SSE2-SSSE3-NEXT:    por %xmm0, %xmm1
    234 ; SSE2-SSSE3-NEXT:    packssdw %xmm3, %xmm1
    235 ; SSE2-SSSE3-NEXT:    movmskps %xmm1, %eax
    236 ; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
    237 ; SSE2-SSSE3-NEXT:    retq
    238 ;
    239 ; AVX1-LABEL: v4i64:
    240 ; AVX1:       # %bb.0:
    241 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    242 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    243 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
    244 ; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
    245 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    246 ; AVX1-NEXT:    vmovmskpd %ymm0, %eax
    247 ; AVX1-NEXT:    # kill: def $al killed $al killed $eax
    248 ; AVX1-NEXT:    vzeroupper
    249 ; AVX1-NEXT:    retq
    250 ;
    251 ; AVX2-LABEL: v4i64:
    252 ; AVX2:       # %bb.0:
    253 ; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
    254 ; AVX2-NEXT:    vmovmskpd %ymm0, %eax
    255 ; AVX2-NEXT:    # kill: def $al killed $al killed $eax
    256 ; AVX2-NEXT:    vzeroupper
    257 ; AVX2-NEXT:    retq
    258 ;
    259 ; AVX512F-LABEL: v4i64:
    260 ; AVX512F:       # %bb.0:
    261 ; AVX512F-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0
    262 ; AVX512F-NEXT:    kmovw %k0, %eax
    263 ; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
    264 ; AVX512F-NEXT:    vzeroupper
    265 ; AVX512F-NEXT:    retq
    266 ;
    267 ; AVX512BW-LABEL: v4i64:
    268 ; AVX512BW:       # %bb.0:
    269 ; AVX512BW-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0
    270 ; AVX512BW-NEXT:    kmovd %k0, %eax
    271 ; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
    272 ; AVX512BW-NEXT:    vzeroupper
    273 ; AVX512BW-NEXT:    retq
    274   %x = icmp sgt <4 x i64> %a, %b
    275   %res = bitcast <4 x i1> %x to i4
    276   ret i4 %res
    277 }
    278 
    279 define i4 @v4f64(<4 x double> %a, <4 x double> %b) {
    280 ; SSE2-SSSE3-LABEL: v4f64:
    281 ; SSE2-SSSE3:       # %bb.0:
    282 ; SSE2-SSSE3-NEXT:    cmpltpd %xmm1, %xmm3
    283 ; SSE2-SSSE3-NEXT:    cmpltpd %xmm0, %xmm2
    284 ; SSE2-SSSE3-NEXT:    packssdw %xmm3, %xmm2
    285 ; SSE2-SSSE3-NEXT:    movmskps %xmm2, %eax
    286 ; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
    287 ; SSE2-SSSE3-NEXT:    retq
    288 ;
    289 ; AVX12-LABEL: v4f64:
    290 ; AVX12:       # %bb.0:
    291 ; AVX12-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
    292 ; AVX12-NEXT:    vmovmskpd %ymm0, %eax
    293 ; AVX12-NEXT:    # kill: def $al killed $al killed $eax
    294 ; AVX12-NEXT:    vzeroupper
    295 ; AVX12-NEXT:    retq
    296 ;
    297 ; AVX512F-LABEL: v4f64:
    298 ; AVX512F:       # %bb.0:
    299 ; AVX512F-NEXT:    vcmpltpd %ymm0, %ymm1, %k0
    300 ; AVX512F-NEXT:    kmovw %k0, %eax
    301 ; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
    302 ; AVX512F-NEXT:    vzeroupper
    303 ; AVX512F-NEXT:    retq
    304 ;
    305 ; AVX512BW-LABEL: v4f64:
    306 ; AVX512BW:       # %bb.0:
    307 ; AVX512BW-NEXT:    vcmpltpd %ymm0, %ymm1, %k0
    308 ; AVX512BW-NEXT:    kmovd %k0, %eax
    309 ; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
    310 ; AVX512BW-NEXT:    vzeroupper
    311 ; AVX512BW-NEXT:    retq
    312   %x = fcmp ogt <4 x double> %a, %b
    313   %res = bitcast <4 x i1> %x to i4
    314   ret i4 %res
    315 }
    316