1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s 2 3 define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4 ;CHECK: vcges8: 5 ;CHECK: vcge.s8 6 %tmp1 = load <8 x i8>* %A 7 %tmp2 = load <8 x i8>* %B 8 %tmp3 = icmp sge <8 x i8> %tmp1, %tmp2 9 %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> 10 ret <8 x i8> %tmp4 11 } 12 13 define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 14 ;CHECK: vcges16: 15 ;CHECK: vcge.s16 16 %tmp1 = load <4 x i16>* %A 17 %tmp2 = load <4 x i16>* %B 18 %tmp3 = icmp sge <4 x i16> %tmp1, %tmp2 19 %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> 20 ret <4 x i16> %tmp4 21 } 22 23 define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 24 ;CHECK: vcges32: 25 ;CHECK: vcge.s32 26 %tmp1 = load <2 x i32>* %A 27 %tmp2 = load <2 x i32>* %B 28 %tmp3 = icmp sge <2 x i32> %tmp1, %tmp2 29 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 30 ret <2 x i32> %tmp4 31 } 32 33 define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 34 ;CHECK: vcgeu8: 35 ;CHECK: vcge.u8 36 %tmp1 = load <8 x i8>* %A 37 %tmp2 = load <8 x i8>* %B 38 %tmp3 = icmp uge <8 x i8> %tmp1, %tmp2 39 %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> 40 ret <8 x i8> %tmp4 41 } 42 43 define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 44 ;CHECK: vcgeu16: 45 ;CHECK: vcge.u16 46 %tmp1 = load <4 x i16>* %A 47 %tmp2 = load <4 x i16>* %B 48 %tmp3 = icmp uge <4 x i16> %tmp1, %tmp2 49 %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> 50 ret <4 x i16> %tmp4 51 } 52 53 define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 54 ;CHECK: vcgeu32: 55 ;CHECK: vcge.u32 56 %tmp1 = load <2 x i32>* %A 57 %tmp2 = load <2 x i32>* %B 58 %tmp3 = icmp uge <2 x i32> %tmp1, %tmp2 59 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 60 ret <2 x i32> %tmp4 61 } 62 63 define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind { 64 ;CHECK: vcgef32: 65 ;CHECK: vcge.f32 66 %tmp1 = load <2 x float>* %A 67 %tmp2 = load <2 x float>* %B 68 %tmp3 = fcmp oge <2 x float> %tmp1, %tmp2 69 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 70 ret <2 x i32> %tmp4 71 } 72 73 define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 74 ;CHECK: vcgeQs8: 75 ;CHECK: vcge.s8 76 %tmp1 = load <16 x i8>* %A 77 %tmp2 = load <16 x i8>* %B 78 %tmp3 = icmp sge <16 x i8> %tmp1, %tmp2 79 %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> 80 ret <16 x i8> %tmp4 81 } 82 83 define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 84 ;CHECK: vcgeQs16: 85 ;CHECK: vcge.s16 86 %tmp1 = load <8 x i16>* %A 87 %tmp2 = load <8 x i16>* %B 88 %tmp3 = icmp sge <8 x i16> %tmp1, %tmp2 89 %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> 90 ret <8 x i16> %tmp4 91 } 92 93 define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 94 ;CHECK: vcgeQs32: 95 ;CHECK: vcge.s32 96 %tmp1 = load <4 x i32>* %A 97 %tmp2 = load <4 x i32>* %B 98 %tmp3 = icmp sge <4 x i32> %tmp1, %tmp2 99 %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> 100 ret <4 x i32> %tmp4 101 } 102 103 define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 104 ;CHECK: vcgeQu8: 105 ;CHECK: vcge.u8 106 %tmp1 = load <16 x i8>* %A 107 %tmp2 = load <16 x i8>* %B 108 %tmp3 = icmp uge <16 x i8> %tmp1, %tmp2 109 %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> 110 ret <16 x i8> %tmp4 111 } 112 113 define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 114 ;CHECK: vcgeQu16: 115 ;CHECK: vcge.u16 116 %tmp1 = load <8 x i16>* %A 117 %tmp2 = load <8 x i16>* %B 118 %tmp3 = icmp uge <8 x i16> %tmp1, %tmp2 119 %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> 120 ret <8 x i16> %tmp4 121 } 122 123 define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 124 ;CHECK: vcgeQu32: 125 ;CHECK: vcge.u32 126 %tmp1 = load <4 x i32>* %A 127 %tmp2 = load <4 x i32>* %B 128 %tmp3 = icmp uge <4 x i32> %tmp1, %tmp2 129 %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> 130 ret <4 x i32> %tmp4 131 } 132 133 define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind { 134 ;CHECK: vcgeQf32: 135 ;CHECK: vcge.f32 136 %tmp1 = load <4 x float>* %A 137 %tmp2 = load <4 x float>* %B 138 %tmp3 = fcmp oge <4 x float> %tmp1, %tmp2 139 %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> 140 ret <4 x i32> %tmp4 141 } 142 143 define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind { 144 ;CHECK: vacgef32: 145 ;CHECK: vacge.f32 146 %tmp1 = load <2 x float>* %A 147 %tmp2 = load <2 x float>* %B 148 %tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2) 149 ret <2 x i32> %tmp3 150 } 151 152 define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind { 153 ;CHECK: vacgeQf32: 154 ;CHECK: vacge.f32 155 %tmp1 = load <4 x float>* %A 156 %tmp2 = load <4 x float>* %B 157 %tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2) 158 ret <4 x i32> %tmp3 159 } 160 161 declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone 162 declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone 163 164 define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind { 165 ;CHECK: vcgei8Z: 166 ;CHECK-NOT: vmov 167 ;CHECK-NOT: vmvn 168 ;CHECK: vcge.s8 169 %tmp1 = load <8 x i8>* %A 170 %tmp3 = icmp sge <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> 171 %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> 172 ret <8 x i8> %tmp4 173 } 174 175 define <8 x i8> @vclei8Z(<8 x i8>* %A) nounwind { 176 ;CHECK: vclei8Z: 177 ;CHECK-NOT: vmov 178 ;CHECK-NOT: vmvn 179 ;CHECK: vcle.s8 180 %tmp1 = load <8 x i8>* %A 181 %tmp3 = icmp sle <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> 182 %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> 183 ret <8 x i8> %tmp4 184 } 185 186 ; Radar 8782191 187 ; Floating-point comparisons against zero produce results with integer 188 ; elements, not floating-point elements. 189 define void @test_vclez_fp() nounwind optsize { 190 ;CHECK: test_vclez_fp 191 ;CHECK: vcle.f32 192 entry: 193 %0 = fcmp ole <4 x float> undef, zeroinitializer 194 %1 = sext <4 x i1> %0 to <4 x i16> 195 %2 = add <4 x i16> %1, zeroinitializer 196 %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 197 %4 = add <8 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 198 %5 = trunc <8 x i16> %4 to <8 x i8> 199 tail call void @llvm.arm.neon.vst1.v8i8(i8* undef, <8 x i8> %5, i32 1) 200 unreachable 201 } 202 203 declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind 204