Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse2 | FileCheck %s -check-prefix=SSE2
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse4.1 | FileCheck %s -check-prefix=SSE41
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx | FileCheck %s -check-prefix=AVX
      4 
      5 define <16 x i8> @v16i8_icmp_uge(<16 x i8> %a, <16 x i8> %b) nounwind readnone ssp uwtable {
      6   %1 = icmp uge <16 x i8> %a, %b
      7   %2 = sext <16 x i1> %1 to <16 x i8>
      8   ret <16 x i8> %2
      9 ; SSE2-LABEL: v16i8_icmp_uge:
     10 ; SSE2: pmaxub  %xmm0, %xmm1
     11 ; SSE2: pcmpeqb %xmm1, %xmm0
     12 
     13 ; SSE41-LABEL: v16i8_icmp_uge:
     14 ; SSE41: pmaxub  %xmm0, %xmm1
     15 ; SSE41: pcmpeqb %xmm1, %xmm0
     16 
     17 ; AVX-LABEL: v16i8_icmp_uge:
     18 ; AVX: vpmaxub  %xmm1, %xmm0, %xmm1
     19 ; AVX: vpcmpeqb %xmm1, %xmm0, %xmm0
     20 }
     21 
     22 define <16 x i8> @v16i8_icmp_ule(<16 x i8> %a, <16 x i8> %b) nounwind readnone ssp uwtable {
     23   %1 = icmp ule <16 x i8> %a, %b
     24   %2 = sext <16 x i1> %1 to <16 x i8>
     25   ret <16 x i8> %2
     26 ; SSE2-LABEL: v16i8_icmp_ule:
     27 ; SSE2: pminub  %xmm0, %xmm1
     28 ; SSE2: pcmpeqb %xmm1, %xmm0
     29 
     30 ; SSE41-LABEL: v16i8_icmp_ule:
     31 ; SSE41: pminub  %xmm0, %xmm1
     32 ; SSE41: pcmpeqb %xmm1, %xmm0
     33 
     34 ; AVX-LABEL: v16i8_icmp_ule:
     35 ; AVX: vpminub  %xmm1, %xmm0, %xmm1
     36 ; AVX: vpcmpeqb %xmm1, %xmm0, %xmm0
     37 }
     38 
     39 
     40 define <8 x i16> @v8i16_icmp_uge(<8 x i16> %a, <8 x i16> %b) nounwind readnone ssp uwtable {
     41   %1 = icmp uge <8 x i16> %a, %b
     42   %2 = sext <8 x i1> %1 to <8 x i16>
     43   ret <8 x i16> %2
     44 ; SSE2-LABEL: v8i16_icmp_uge:
     45 ; SSE2: psubusw %xmm0, %xmm1
     46 ; SEE2: pxor    %xmm0, %xmm0
     47 ; SSE2: pcmpeqw %xmm1, %xmm0
     48 
     49 ; SSE41-LABEL: v8i16_icmp_uge:
     50 ; SSE41: pmaxuw  %xmm0, %xmm1
     51 ; SSE41: pcmpeqw %xmm1, %xmm0
     52 
     53 ; AVX-LABEL: v8i16_icmp_uge:
     54 ; AVX: vpmaxuw  %xmm1, %xmm0, %xmm1
     55 ; AVX: vpcmpeqw %xmm1, %xmm0, %xmm0
     56 }
     57 
     58 define <8 x i16> @v8i16_icmp_ule(<8 x i16> %a, <8 x i16> %b) nounwind readnone ssp uwtable {
     59   %1 = icmp ule <8 x i16> %a, %b
     60   %2 = sext <8 x i1> %1 to <8 x i16>
     61   ret <8 x i16> %2
     62 ; SSE2-LABEL: v8i16_icmp_ule:
     63 ; SSE2: psubusw %xmm1, %xmm0
     64 ; SSE2: pxor    %xmm1, %xmm1
     65 ; SSE2: pcmpeqw %xmm0, %xmm1
     66 ; SSE2: movdqa  %xmm1, %xmm0
     67 
     68 ; SSE41-LABEL: v8i16_icmp_ule:
     69 ; SSE41: pminuw  %xmm0, %xmm1
     70 ; SSE41: pcmpeqw %xmm1, %xmm0
     71 
     72 ; AVX-LABEL: v8i16_icmp_ule:
     73 ; AVX: vpminuw  %xmm1, %xmm0, %xmm1
     74 ; AVX: vpcmpeqw %xmm1, %xmm0, %xmm0
     75 }
     76 
     77 
     78 define <4 x i32> @v4i32_icmp_uge(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp uwtable {
     79   %1 = icmp uge <4 x i32> %a, %b
     80   %2 = sext <4 x i1> %1 to <4 x i32>
     81   ret <4 x i32> %2
     82 ; SSE2-LABEL: v4i32_icmp_uge:
     83 ; SSE2: movdqa  {{.*}}(%rip), %xmm2
     84 ; SSE2: pxor    %xmm2, %xmm0
     85 ; SSE2: pxor    %xmm1, %xmm2
     86 ; SSE2: pcmpgtd %xmm0, %xmm2
     87 ; SSE2: pcmpeqd %xmm0, %xmm0
     88 ; SSE2: pxor    %xmm2, %xmm0
     89 
     90 ; SSE41-LABEL: v4i32_icmp_uge:
     91 ; SSE41: pmaxud  %xmm0, %xmm1
     92 ; SSE41: pcmpeqd %xmm1, %xmm0
     93 
     94 ; AVX-LABEL: v4i32_icmp_uge:
     95 ; AVX: vpmaxud  %xmm1, %xmm0, %xmm1
     96 ; AVX: vpcmpeqd %xmm1, %xmm0, %xmm0
     97 }
     98 
     99 define <4 x i32> @v4i32_icmp_ule(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp uwtable {
    100   %1 = icmp ule <4 x i32> %a, %b
    101   %2 = sext <4 x i1> %1 to <4 x i32>
    102   ret <4 x i32> %2
    103 ; SSE2-LABEL: v4i32_icmp_ule:
    104 ; SSE2: movdqa  {{.*}}(%rip), %xmm2
    105 ; SSE2: pxor    %xmm2, %xmm1
    106 ; SSE2: pxor    %xmm2, %xmm0
    107 ; SSE2: pcmpgtd %xmm1, %xmm0
    108 ; SSE2: pcmpeqd %xmm1, %xmm1
    109 ; SSE2: pxor    %xmm0, %xmm1
    110 ; SSE2: movdqa  %xmm1, %xmm0
    111 
    112 ; SSE41-LABEL: v4i32_icmp_ule:
    113 ; SSE41: pminud  %xmm0, %xmm1
    114 ; SSE41: pcmpeqd %xmm1, %xmm0
    115 
    116 ; AVX-LABEL: v4i32_icmp_ule:
    117 ; AVX: pminud  %xmm1, %xmm0, %xmm1
    118 ; AVX: pcmpeqd %xmm1, %xmm0, %xmm0
    119 }
    120 
    121 ; At one point we were incorrectly constant-folding a setcc to 0x1 instead of
    122 ; 0xff, leading to a constpool load. The instruction doesn't matter here, but it
    123 ; should set all bits to 1.
    124 define <16 x i8> @test_setcc_constfold_vi8(<16 x i8> %l, <16 x i8> %r) {
    125   %test1 = icmp eq <16 x i8> %l, %r
    126   %mask1 = sext <16 x i1> %test1 to <16 x i8>
    127 
    128   %test2 = icmp ne <16 x i8> %l, %r
    129   %mask2 = sext <16 x i1> %test2 to <16 x i8>
    130 
    131   %res = or <16 x i8> %mask1, %mask2
    132   ret <16 x i8> %res
    133 ; SSE2-LABEL: test_setcc_constfold_vi8:
    134 ; SSE2: pcmpeqd %xmm0, %xmm0
    135 
    136 ; SSE41-LABEL: test_setcc_constfold_vi8:
    137 ; SSE41: pcmpeqd %xmm0, %xmm0
    138 
    139 ; AVX-LABEL: test_setcc_constfold_vi8:
    140 ; AVX: vpcmpeqd %xmm0, %xmm0, %xmm0
    141 }
    142 
    143 ; Make sure sensible results come from doing extension afterwards
    144 define <16 x i8> @test_setcc_constfold_vi1(<16 x i8> %l, <16 x i8> %r) {
    145   %test1 = icmp eq <16 x i8> %l, %r
    146   %test2 = icmp ne <16 x i8> %l, %r
    147 
    148   %res = or <16 x i1> %test1, %test2
    149   %mask = sext <16 x i1> %res to <16 x i8>
    150   ret <16 x i8> %mask
    151 ; SSE2-LABEL: test_setcc_constfold_vi1:
    152 ; SSE2: pcmpeqd %xmm0, %xmm0
    153 
    154 ; SSE41-LABEL: test_setcc_constfold_vi1:
    155 ; SSE41: pcmpeqd %xmm0, %xmm0
    156 
    157 ; AVX-LABEL: test_setcc_constfold_vi1:
    158 ; AVX: vpcmpeqd %xmm0, %xmm0, %xmm0
    159 }
    160 
    161 
    162 ; 64-bit case is also particularly important, as the constant "-1" is probably
    163 ; just 32-bits wide.
    164 define <2 x i64> @test_setcc_constfold_vi64(<2 x i64> %l, <2 x i64> %r) {
    165   %test1 = icmp eq <2 x i64> %l, %r
    166   %mask1 = sext <2 x i1> %test1 to <2 x i64>
    167 
    168   %test2 = icmp ne <2 x i64> %l, %r
    169   %mask2 = sext <2 x i1> %test2 to <2 x i64>
    170 
    171   %res = or <2 x i64> %mask1, %mask2
    172   ret <2 x i64> %res
    173 ; SSE2-LABEL: test_setcc_constfold_vi64:
    174 ; SSE2: pcmpeqd %xmm0, %xmm0
    175 
    176 ; SSE41-LABEL: test_setcc_constfold_vi64:
    177 ; SSE41: pcmpeqd %xmm0, %xmm0
    178 
    179 ; AVX-LABEL: test_setcc_constfold_vi64:
    180 ; AVX: vpcmpeqd %xmm0, %xmm0, %xmm0
    181 }
    182