1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s 2 3 ; This tests icmp operations that do not map directly to NEON instructions. 4 ; Not-equal (ne) operations are implemented by VCEQ/VMVN. Less-than (lt/ult) 5 ; and less-than-or-equal (le/ule) are implemented by swapping the arguments 6 ; to VCGT and VCGE. Test all the operand types for not-equal but only sample 7 ; the other operations. 8 9 define <8 x i8> @vcnei8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 10 ;CHECK: vcnei8: 11 ;CHECK: vceq.i8 12 ;CHECK-NEXT: vmvn 13 %tmp1 = load <8 x i8>* %A 14 %tmp2 = load <8 x i8>* %B 15 %tmp3 = icmp ne <8 x i8> %tmp1, %tmp2 16 %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> 17 ret <8 x i8> %tmp4 18 } 19 20 define <4 x i16> @vcnei16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 21 ;CHECK: vcnei16: 22 ;CHECK: vceq.i16 23 ;CHECK-NEXT: vmvn 24 %tmp1 = load <4 x i16>* %A 25 %tmp2 = load <4 x i16>* %B 26 %tmp3 = icmp ne <4 x i16> %tmp1, %tmp2 27 %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> 28 ret <4 x i16> %tmp4 29 } 30 31 define <2 x i32> @vcnei32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 32 ;CHECK: vcnei32: 33 ;CHECK: vceq.i32 34 ;CHECK-NEXT: vmvn 35 %tmp1 = load <2 x i32>* %A 36 %tmp2 = load <2 x i32>* %B 37 %tmp3 = icmp ne <2 x i32> %tmp1, %tmp2 38 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 39 ret <2 x i32> %tmp4 40 } 41 42 define <16 x i8> @vcneQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 43 ;CHECK: vcneQi8: 44 ;CHECK: vceq.i8 45 ;CHECK-NEXT: vmvn 46 %tmp1 = load <16 x i8>* %A 47 %tmp2 = load <16 x i8>* %B 48 %tmp3 = icmp ne <16 x i8> %tmp1, %tmp2 49 %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> 50 ret <16 x i8> %tmp4 51 } 52 53 define <8 x i16> @vcneQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 54 ;CHECK: vcneQi16: 55 ;CHECK: vceq.i16 56 ;CHECK-NEXT: vmvn 57 %tmp1 = load <8 x i16>* %A 58 %tmp2 = load <8 x i16>* %B 59 %tmp3 = icmp ne <8 x i16> %tmp1, %tmp2 60 %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> 61 ret <8 x i16> %tmp4 62 } 63 64 define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 65 ;CHECK: vcneQi32: 66 ;CHECK: vceq.i32 67 ;CHECK-NEXT: vmvn 68 %tmp1 = load <4 x i32>* %A 69 %tmp2 = load <4 x i32>* %B 70 %tmp3 = icmp ne <4 x i32> %tmp1, %tmp2 71 %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> 72 ret <4 x i32> %tmp4 73 } 74 75 define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 76 ;CHECK: vcltQs8: 77 ;CHECK: vcgt.s8 78 %tmp1 = load <16 x i8>* %A 79 %tmp2 = load <16 x i8>* %B 80 %tmp3 = icmp slt <16 x i8> %tmp1, %tmp2 81 %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> 82 ret <16 x i8> %tmp4 83 } 84 85 define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 86 ;CHECK: vcles16: 87 ;CHECK: vcge.s16 88 %tmp1 = load <4 x i16>* %A 89 %tmp2 = load <4 x i16>* %B 90 %tmp3 = icmp sle <4 x i16> %tmp1, %tmp2 91 %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> 92 ret <4 x i16> %tmp4 93 } 94 95 define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 96 ;CHECK: vcltu16: 97 ;CHECK: vcgt.u16 98 %tmp1 = load <4 x i16>* %A 99 %tmp2 = load <4 x i16>* %B 100 %tmp3 = icmp ult <4 x i16> %tmp1, %tmp2 101 %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> 102 ret <4 x i16> %tmp4 103 } 104 105 define <4 x i32> @vcleQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 106 ;CHECK: vcleQu32: 107 ;CHECK: vcge.u32 108 %tmp1 = load <4 x i32>* %A 109 %tmp2 = load <4 x i32>* %B 110 %tmp3 = icmp ule <4 x i32> %tmp1, %tmp2 111 %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> 112 ret <4 x i32> %tmp4 113 } 114