Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 
      3 ; This tests icmp operations that do not map directly to NEON instructions.
      4 ; Not-equal (ne) operations are implemented by VCEQ/VMVN.  Less-than (lt/ult)
      5 ; and less-than-or-equal (le/ule) are implemented by swapping the arguments
      6 ; to VCGT and VCGE.  Test all the operand types for not-equal but only sample
      7 ; the other operations.
      8 
      9 define <8 x i8> @vcnei8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
     10 ;CHECK: vcnei8:
     11 ;CHECK: vceq.i8
     12 ;CHECK-NEXT: vmvn
     13 	%tmp1 = load <8 x i8>* %A
     14 	%tmp2 = load <8 x i8>* %B
     15 	%tmp3 = icmp ne <8 x i8> %tmp1, %tmp2
     16         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
     17 	ret <8 x i8> %tmp4
     18 }
     19 
     20 define <4 x i16> @vcnei16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     21 ;CHECK: vcnei16:
     22 ;CHECK: vceq.i16
     23 ;CHECK-NEXT: vmvn
     24 	%tmp1 = load <4 x i16>* %A
     25 	%tmp2 = load <4 x i16>* %B
     26 	%tmp3 = icmp ne <4 x i16> %tmp1, %tmp2
     27         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
     28 	ret <4 x i16> %tmp4
     29 }
     30 
     31 define <2 x i32> @vcnei32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     32 ;CHECK: vcnei32:
     33 ;CHECK: vceq.i32
     34 ;CHECK-NEXT: vmvn
     35 	%tmp1 = load <2 x i32>* %A
     36 	%tmp2 = load <2 x i32>* %B
     37 	%tmp3 = icmp ne <2 x i32> %tmp1, %tmp2
     38         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
     39 	ret <2 x i32> %tmp4
     40 }
     41 
     42 define <16 x i8> @vcneQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     43 ;CHECK: vcneQi8:
     44 ;CHECK: vceq.i8
     45 ;CHECK-NEXT: vmvn
     46 	%tmp1 = load <16 x i8>* %A
     47 	%tmp2 = load <16 x i8>* %B
     48 	%tmp3 = icmp ne <16 x i8> %tmp1, %tmp2
     49         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
     50 	ret <16 x i8> %tmp4
     51 }
     52 
     53 define <8 x i16> @vcneQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     54 ;CHECK: vcneQi16:
     55 ;CHECK: vceq.i16
     56 ;CHECK-NEXT: vmvn
     57 	%tmp1 = load <8 x i16>* %A
     58 	%tmp2 = load <8 x i16>* %B
     59 	%tmp3 = icmp ne <8 x i16> %tmp1, %tmp2
     60         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
     61 	ret <8 x i16> %tmp4
     62 }
     63 
     64 define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
     65 ;CHECK: vcneQi32:
     66 ;CHECK: vceq.i32
     67 ;CHECK-NEXT: vmvn
     68 	%tmp1 = load <4 x i32>* %A
     69 	%tmp2 = load <4 x i32>* %B
     70 	%tmp3 = icmp ne <4 x i32> %tmp1, %tmp2
     71         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
     72 	ret <4 x i32> %tmp4
     73 }
     74 
     75 define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     76 ;CHECK: vcltQs8:
     77 ;CHECK: vcgt.s8
     78 	%tmp1 = load <16 x i8>* %A
     79 	%tmp2 = load <16 x i8>* %B
     80 	%tmp3 = icmp slt <16 x i8> %tmp1, %tmp2
     81         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
     82 	ret <16 x i8> %tmp4
     83 }
     84 
     85 define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     86 ;CHECK: vcles16:
     87 ;CHECK: vcge.s16
     88 	%tmp1 = load <4 x i16>* %A
     89 	%tmp2 = load <4 x i16>* %B
     90 	%tmp3 = icmp sle <4 x i16> %tmp1, %tmp2
     91         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
     92 	ret <4 x i16> %tmp4
     93 }
     94 
     95 define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     96 ;CHECK: vcltu16:
     97 ;CHECK: vcgt.u16
     98 	%tmp1 = load <4 x i16>* %A
     99 	%tmp2 = load <4 x i16>* %B
    100 	%tmp3 = icmp ult <4 x i16> %tmp1, %tmp2
    101         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
    102 	ret <4 x i16> %tmp4
    103 }
    104 
    105 define <4 x i32> @vcleQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    106 ;CHECK: vcleQu32:
    107 ;CHECK: vcge.u32
    108 	%tmp1 = load <4 x i32>* %A
    109 	%tmp2 = load <4 x i32>* %B
    110 	%tmp3 = icmp ule <4 x i32> %tmp1, %tmp2
    111         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
    112 	ret <4 x i32> %tmp4
    113 }
    114