Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse2 | FileCheck %s -check-prefix=SSE2
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse4.1 | FileCheck %s -check-prefix=SSE41
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx | FileCheck %s -check-prefix=AVX
      4 
      5 define <16 x i8> @v16i8_icmp_uge(<16 x i8> %a, <16 x i8> %b) nounwind readnone ssp uwtable {
      6   %1 = icmp uge <16 x i8> %a, %b
      7   %2 = sext <16 x i1> %1 to <16 x i8>
      8   ret <16 x i8> %2
      9 ; SSE2-LABEL: v16i8_icmp_uge:
     10 ; SSE2: pmaxub  %xmm0, %xmm1
     11 ; SSE2: pcmpeqb %xmm1, %xmm0
     12 
     13 ; SSE41-LABEL: v16i8_icmp_uge:
     14 ; SSE41: pmaxub  %xmm0, %xmm1
     15 ; SSE41: pcmpeqb %xmm1, %xmm0
     16 
     17 ; AVX-LABEL: v16i8_icmp_uge:
     18 ; AVX: vpmaxub  %xmm1, %xmm0, %xmm1
     19 ; AVX: vpcmpeqb %xmm1, %xmm0, %xmm0
     20 }
     21 
     22 define <16 x i8> @v16i8_icmp_ule(<16 x i8> %a, <16 x i8> %b) nounwind readnone ssp uwtable {
     23   %1 = icmp ule <16 x i8> %a, %b
     24   %2 = sext <16 x i1> %1 to <16 x i8>
     25   ret <16 x i8> %2
     26 ; SSE2-LABEL: v16i8_icmp_ule:
     27 ; SSE2: pminub  %xmm0, %xmm1
     28 ; SSE2: pcmpeqb %xmm1, %xmm0
     29 
     30 ; SSE41-LABEL: v16i8_icmp_ule:
     31 ; SSE41: pminub  %xmm0, %xmm1
     32 ; SSE41: pcmpeqb %xmm1, %xmm0
     33 
     34 ; AVX-LABEL: v16i8_icmp_ule:
     35 ; AVX: vpminub  %xmm1, %xmm0, %xmm1
     36 ; AVX: vpcmpeqb %xmm1, %xmm0, %xmm0
     37 }
     38 
     39 
     40 define <8 x i16> @v8i16_icmp_uge(<8 x i16> %a, <8 x i16> %b) nounwind readnone ssp uwtable {
     41   %1 = icmp uge <8 x i16> %a, %b
     42   %2 = sext <8 x i1> %1 to <8 x i16>
     43   ret <8 x i16> %2
     44 ; SSE2-LABEL: v8i16_icmp_uge:
     45 ; SSE2: psubusw %xmm0, %xmm1
     46 ; SEE2: pxor    %xmm0, %xmm0
     47 ; SSE2: pcmpeqw %xmm1, %xmm0
     48 
     49 ; SSE41-LABEL: v8i16_icmp_uge:
     50 ; SSE41: pmaxuw  %xmm0, %xmm1
     51 ; SSE41: pcmpeqw %xmm1, %xmm0
     52 
     53 ; AVX-LABEL: v8i16_icmp_uge:
     54 ; AVX: vpmaxuw  %xmm1, %xmm0, %xmm1
     55 ; AVX: vpcmpeqw %xmm1, %xmm0, %xmm0
     56 }
     57 
     58 define <8 x i16> @v8i16_icmp_ule(<8 x i16> %a, <8 x i16> %b) nounwind readnone ssp uwtable {
     59   %1 = icmp ule <8 x i16> %a, %b
     60   %2 = sext <8 x i1> %1 to <8 x i16>
     61   ret <8 x i16> %2
     62 ; SSE2-LABEL: v8i16_icmp_ule:
     63 ; SSE2: psubusw %xmm1, %xmm0
     64 ; SSE2: pxor    %xmm1, %xmm1
     65 ; SSE2: pcmpeqw %xmm1, %xmm0
     66 
     67 ; SSE41-LABEL: v8i16_icmp_ule:
     68 ; SSE41: pminuw  %xmm0, %xmm1
     69 ; SSE41: pcmpeqw %xmm1, %xmm0
     70 
     71 ; AVX-LABEL: v8i16_icmp_ule:
     72 ; AVX: vpminuw  %xmm1, %xmm0, %xmm1
     73 ; AVX: vpcmpeqw %xmm1, %xmm0, %xmm0
     74 }
     75 
     76 
     77 define <4 x i32> @v4i32_icmp_uge(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp uwtable {
     78   %1 = icmp uge <4 x i32> %a, %b
     79   %2 = sext <4 x i1> %1 to <4 x i32>
     80   ret <4 x i32> %2
     81 ; SSE2-LABEL: v4i32_icmp_uge:
     82 ; SSE2: movdqa  {{.*}}(%rip), %xmm2
     83 ; SSE2: pxor    %xmm2, %xmm0
     84 ; SSE2: pxor    %xmm1, %xmm2
     85 ; SSE2: pcmpgtd %xmm0, %xmm2
     86 ; SSE2: pcmpeqd %xmm0, %xmm0
     87 ; SSE2: pxor    %xmm2, %xmm0
     88 
     89 ; SSE41-LABEL: v4i32_icmp_uge:
     90 ; SSE41: pmaxud  %xmm0, %xmm1
     91 ; SSE41: pcmpeqd %xmm1, %xmm0
     92 
     93 ; AVX-LABEL: v4i32_icmp_uge:
     94 ; AVX: vpmaxud  %xmm1, %xmm0, %xmm1
     95 ; AVX: vpcmpeqd %xmm1, %xmm0, %xmm0
     96 }
     97 
     98 define <4 x i32> @v4i32_icmp_ule(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp uwtable {
     99   %1 = icmp ule <4 x i32> %a, %b
    100   %2 = sext <4 x i1> %1 to <4 x i32>
    101   ret <4 x i32> %2
    102 ; SSE2-LABEL: v4i32_icmp_ule:
    103 ; SSE2: movdqa  {{.*}}(%rip), %xmm2
    104 ; SSE2: pxor    %xmm2, %xmm1
    105 ; SSE2: pxor    %xmm2, %xmm0
    106 ; SSE2: pcmpgtd %xmm1, %xmm0
    107 ; SSE2: pcmpeqd %xmm1, %xmm1
    108 ; SSE2: pxor    %xmm1, %xmm0
    109 
    110 ; SSE41-LABEL: v4i32_icmp_ule:
    111 ; SSE41: pminud  %xmm0, %xmm1
    112 ; SSE41: pcmpeqd %xmm1, %xmm0
    113 
    114 ; AVX-LABEL: v4i32_icmp_ule:
    115 ; AVX: pminud  %xmm1, %xmm0, %xmm1
    116 ; AVX: pcmpeqd %xmm1, %xmm0, %xmm0
    117 }
    118 
    119 ; At one point we were incorrectly constant-folding a setcc to 0x1 instead of
    120 ; 0xff, leading to a constpool load. The instruction doesn't matter here, but it
    121 ; should set all bits to 1.
    122 define <16 x i8> @test_setcc_constfold_vi8(<16 x i8> %l, <16 x i8> %r) {
    123   %test1 = icmp eq <16 x i8> %l, %r
    124   %mask1 = sext <16 x i1> %test1 to <16 x i8>
    125 
    126   %test2 = icmp ne <16 x i8> %l, %r
    127   %mask2 = sext <16 x i1> %test2 to <16 x i8>
    128 
    129   %res = or <16 x i8> %mask1, %mask2
    130   ret <16 x i8> %res
    131 ; SSE2-LABEL: test_setcc_constfold_vi8:
    132 ; SSE2: pcmpeqd %xmm0, %xmm0
    133 
    134 ; SSE41-LABEL: test_setcc_constfold_vi8:
    135 ; SSE41: pcmpeqd %xmm0, %xmm0
    136 
    137 ; AVX-LABEL: test_setcc_constfold_vi8:
    138 ; AVX: vpcmpeqd %xmm0, %xmm0, %xmm0
    139 }
    140 
    141 ; Make sure sensible results come from doing extension afterwards
    142 define <16 x i8> @test_setcc_constfold_vi1(<16 x i8> %l, <16 x i8> %r) {
    143   %test1 = icmp eq <16 x i8> %l, %r
    144   %test2 = icmp ne <16 x i8> %l, %r
    145 
    146   %res = or <16 x i1> %test1, %test2
    147   %mask = sext <16 x i1> %res to <16 x i8>
    148   ret <16 x i8> %mask
    149 ; SSE2-LABEL: test_setcc_constfold_vi1:
    150 ; SSE2: pcmpeqd %xmm0, %xmm0
    151 
    152 ; SSE41-LABEL: test_setcc_constfold_vi1:
    153 ; SSE41: pcmpeqd %xmm0, %xmm0
    154 
    155 ; AVX-LABEL: test_setcc_constfold_vi1:
    156 ; AVX: vpcmpeqd %xmm0, %xmm0, %xmm0
    157 }
    158 
    159 
    160 ; 64-bit case is also particularly important, as the constant "-1" is probably
    161 ; just 32-bits wide.
    162 define <2 x i64> @test_setcc_constfold_vi64(<2 x i64> %l, <2 x i64> %r) {
    163   %test1 = icmp eq <2 x i64> %l, %r
    164   %mask1 = sext <2 x i1> %test1 to <2 x i64>
    165 
    166   %test2 = icmp ne <2 x i64> %l, %r
    167   %mask2 = sext <2 x i1> %test2 to <2 x i64>
    168 
    169   %res = or <2 x i64> %mask1, %mask2
    170   ret <2 x i64> %res
    171 ; SSE2-LABEL: test_setcc_constfold_vi64:
    172 ; SSE2: pcmpeqd %xmm0, %xmm0
    173 
    174 ; SSE41-LABEL: test_setcc_constfold_vi64:
    175 ; SSE41: pcmpeqd %xmm0, %xmm0
    176 
    177 ; AVX-LABEL: test_setcc_constfold_vi64:
    178 ; AVX: vpcmpeqd %xmm0, %xmm0, %xmm0
    179 }
    180