Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 
      3 define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      4 ;CHECK: vcges8:
      5 ;CHECK: vcge.s8
      6 	%tmp1 = load <8 x i8>* %A
      7 	%tmp2 = load <8 x i8>* %B
      8 	%tmp3 = icmp sge <8 x i8> %tmp1, %tmp2
      9         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
     10 	ret <8 x i8> %tmp4
     11 }
     12 
     13 define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     14 ;CHECK: vcges16:
     15 ;CHECK: vcge.s16
     16 	%tmp1 = load <4 x i16>* %A
     17 	%tmp2 = load <4 x i16>* %B
     18 	%tmp3 = icmp sge <4 x i16> %tmp1, %tmp2
     19         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
     20 	ret <4 x i16> %tmp4
     21 }
     22 
     23 define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     24 ;CHECK: vcges32:
     25 ;CHECK: vcge.s32
     26 	%tmp1 = load <2 x i32>* %A
     27 	%tmp2 = load <2 x i32>* %B
     28 	%tmp3 = icmp sge <2 x i32> %tmp1, %tmp2
     29         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
     30 	ret <2 x i32> %tmp4
     31 }
     32 
     33 define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
     34 ;CHECK: vcgeu8:
     35 ;CHECK: vcge.u8
     36 	%tmp1 = load <8 x i8>* %A
     37 	%tmp2 = load <8 x i8>* %B
     38 	%tmp3 = icmp uge <8 x i8> %tmp1, %tmp2
     39         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
     40 	ret <8 x i8> %tmp4
     41 }
     42 
     43 define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     44 ;CHECK: vcgeu16:
     45 ;CHECK: vcge.u16
     46 	%tmp1 = load <4 x i16>* %A
     47 	%tmp2 = load <4 x i16>* %B
     48 	%tmp3 = icmp uge <4 x i16> %tmp1, %tmp2
     49         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
     50 	ret <4 x i16> %tmp4
     51 }
     52 
     53 define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     54 ;CHECK: vcgeu32:
     55 ;CHECK: vcge.u32
     56 	%tmp1 = load <2 x i32>* %A
     57 	%tmp2 = load <2 x i32>* %B
     58 	%tmp3 = icmp uge <2 x i32> %tmp1, %tmp2
     59         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
     60 	ret <2 x i32> %tmp4
     61 }
     62 
     63 define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
     64 ;CHECK: vcgef32:
     65 ;CHECK: vcge.f32
     66 	%tmp1 = load <2 x float>* %A
     67 	%tmp2 = load <2 x float>* %B
     68 	%tmp3 = fcmp oge <2 x float> %tmp1, %tmp2
     69         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
     70 	ret <2 x i32> %tmp4
     71 }
     72 
     73 define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     74 ;CHECK: vcgeQs8:
     75 ;CHECK: vcge.s8
     76 	%tmp1 = load <16 x i8>* %A
     77 	%tmp2 = load <16 x i8>* %B
     78 	%tmp3 = icmp sge <16 x i8> %tmp1, %tmp2
     79         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
     80 	ret <16 x i8> %tmp4
     81 }
     82 
     83 define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     84 ;CHECK: vcgeQs16:
     85 ;CHECK: vcge.s16
     86 	%tmp1 = load <8 x i16>* %A
     87 	%tmp2 = load <8 x i16>* %B
     88 	%tmp3 = icmp sge <8 x i16> %tmp1, %tmp2
     89         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
     90 	ret <8 x i16> %tmp4
     91 }
     92 
     93 define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
     94 ;CHECK: vcgeQs32:
     95 ;CHECK: vcge.s32
     96 	%tmp1 = load <4 x i32>* %A
     97 	%tmp2 = load <4 x i32>* %B
     98 	%tmp3 = icmp sge <4 x i32> %tmp1, %tmp2
     99         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
    100 	ret <4 x i32> %tmp4
    101 }
    102 
    103 define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    104 ;CHECK: vcgeQu8:
    105 ;CHECK: vcge.u8
    106 	%tmp1 = load <16 x i8>* %A
    107 	%tmp2 = load <16 x i8>* %B
    108 	%tmp3 = icmp uge <16 x i8> %tmp1, %tmp2
    109         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
    110 	ret <16 x i8> %tmp4
    111 }
    112 
    113 define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    114 ;CHECK: vcgeQu16:
    115 ;CHECK: vcge.u16
    116 	%tmp1 = load <8 x i16>* %A
    117 	%tmp2 = load <8 x i16>* %B
    118 	%tmp3 = icmp uge <8 x i16> %tmp1, %tmp2
    119         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
    120 	ret <8 x i16> %tmp4
    121 }
    122 
    123 define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    124 ;CHECK: vcgeQu32:
    125 ;CHECK: vcge.u32
    126 	%tmp1 = load <4 x i32>* %A
    127 	%tmp2 = load <4 x i32>* %B
    128 	%tmp3 = icmp uge <4 x i32> %tmp1, %tmp2
    129         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
    130 	ret <4 x i32> %tmp4
    131 }
    132 
    133 define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
    134 ;CHECK: vcgeQf32:
    135 ;CHECK: vcge.f32
    136 	%tmp1 = load <4 x float>* %A
    137 	%tmp2 = load <4 x float>* %B
    138 	%tmp3 = fcmp oge <4 x float> %tmp1, %tmp2
    139         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
    140 	ret <4 x i32> %tmp4
    141 }
    142 
    143 define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
    144 ;CHECK: vacgef32:
    145 ;CHECK: vacge.f32
    146 	%tmp1 = load <2 x float>* %A
    147 	%tmp2 = load <2 x float>* %B
    148 	%tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2)
    149 	ret <2 x i32> %tmp3
    150 }
    151 
    152 define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
    153 ;CHECK: vacgeQf32:
    154 ;CHECK: vacge.f32
    155 	%tmp1 = load <4 x float>* %A
    156 	%tmp2 = load <4 x float>* %B
    157 	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2)
    158 	ret <4 x i32> %tmp3
    159 }
    160 
    161 declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
    162 declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
    163 
    164 define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind {
    165 ;CHECK: vcgei8Z:
    166 ;CHECK-NOT: vmov
    167 ;CHECK-NOT: vmvn
    168 ;CHECK: vcge.s8
    169 	%tmp1 = load <8 x i8>* %A
    170 	%tmp3 = icmp sge <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
    171         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
    172 	ret <8 x i8> %tmp4
    173 }
    174 
    175 define <8 x i8> @vclei8Z(<8 x i8>* %A) nounwind {
    176 ;CHECK: vclei8Z:
    177 ;CHECK-NOT: vmov
    178 ;CHECK-NOT: vmvn
    179 ;CHECK: vcle.s8
    180 	%tmp1 = load <8 x i8>* %A
    181 	%tmp3 = icmp sle <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
    182         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
    183 	ret <8 x i8> %tmp4
    184 }
    185 
    186 ; Radar 8782191
    187 ; Floating-point comparisons against zero produce results with integer
    188 ; elements, not floating-point elements.
    189 define void @test_vclez_fp() nounwind optsize {
    190 ;CHECK: test_vclez_fp
    191 ;CHECK: vcle.f32
    192 entry:
    193   %0 = fcmp ole <4 x float> undef, zeroinitializer
    194   %1 = sext <4 x i1> %0 to <4 x i16>
    195   %2 = add <4 x i16> %1, zeroinitializer
    196   %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    197   %4 = add <8 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    198   %5 = trunc <8 x i16> %4 to <8 x i8>
    199   tail call void @llvm.arm.neon.vst1.v8i8(i8* undef, <8 x i8> %5, i32 1)
    200   unreachable
    201 }
    202 
    203 declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
    204