Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 ; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
      3 
      4 define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      5 ;CHECK: vcgts8:
      6 ;CHECK: vcgt.s8
      7 	%tmp1 = load <8 x i8>* %A
      8 	%tmp2 = load <8 x i8>* %B
      9 	%tmp3 = icmp sgt <8 x i8> %tmp1, %tmp2
     10         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
     11 	ret <8 x i8> %tmp4
     12 }
     13 
     14 define <4 x i16> @vcgts16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     15 ;CHECK: vcgts16:
     16 ;CHECK: vcgt.s16
     17 	%tmp1 = load <4 x i16>* %A
     18 	%tmp2 = load <4 x i16>* %B
     19 	%tmp3 = icmp sgt <4 x i16> %tmp1, %tmp2
     20         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
     21 	ret <4 x i16> %tmp4
     22 }
     23 
     24 define <2 x i32> @vcgts32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     25 ;CHECK: vcgts32:
     26 ;CHECK: vcgt.s32
     27 	%tmp1 = load <2 x i32>* %A
     28 	%tmp2 = load <2 x i32>* %B
     29 	%tmp3 = icmp sgt <2 x i32> %tmp1, %tmp2
     30         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
     31 	ret <2 x i32> %tmp4
     32 }
     33 
     34 define <8 x i8> @vcgtu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
     35 ;CHECK: vcgtu8:
     36 ;CHECK: vcgt.u8
     37 	%tmp1 = load <8 x i8>* %A
     38 	%tmp2 = load <8 x i8>* %B
     39 	%tmp3 = icmp ugt <8 x i8> %tmp1, %tmp2
     40         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
     41 	ret <8 x i8> %tmp4
     42 }
     43 
     44 define <4 x i16> @vcgtu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     45 ;CHECK: vcgtu16:
     46 ;CHECK: vcgt.u16
     47 	%tmp1 = load <4 x i16>* %A
     48 	%tmp2 = load <4 x i16>* %B
     49 	%tmp3 = icmp ugt <4 x i16> %tmp1, %tmp2
     50         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
     51 	ret <4 x i16> %tmp4
     52 }
     53 
     54 define <2 x i32> @vcgtu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     55 ;CHECK: vcgtu32:
     56 ;CHECK: vcgt.u32
     57 	%tmp1 = load <2 x i32>* %A
     58 	%tmp2 = load <2 x i32>* %B
     59 	%tmp3 = icmp ugt <2 x i32> %tmp1, %tmp2
     60         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
     61 	ret <2 x i32> %tmp4
     62 }
     63 
     64 define <2 x i32> @vcgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
     65 ;CHECK: vcgtf32:
     66 ;CHECK: vcgt.f32
     67 	%tmp1 = load <2 x float>* %A
     68 	%tmp2 = load <2 x float>* %B
     69 	%tmp3 = fcmp ogt <2 x float> %tmp1, %tmp2
     70         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
     71 	ret <2 x i32> %tmp4
     72 }
     73 
     74 define <16 x i8> @vcgtQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     75 ;CHECK: vcgtQs8:
     76 ;CHECK: vcgt.s8
     77 	%tmp1 = load <16 x i8>* %A
     78 	%tmp2 = load <16 x i8>* %B
     79 	%tmp3 = icmp sgt <16 x i8> %tmp1, %tmp2
     80         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
     81 	ret <16 x i8> %tmp4
     82 }
     83 
     84 define <8 x i16> @vcgtQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     85 ;CHECK: vcgtQs16:
     86 ;CHECK: vcgt.s16
     87 	%tmp1 = load <8 x i16>* %A
     88 	%tmp2 = load <8 x i16>* %B
     89 	%tmp3 = icmp sgt <8 x i16> %tmp1, %tmp2
     90         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
     91 	ret <8 x i16> %tmp4
     92 }
     93 
     94 define <4 x i32> @vcgtQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
     95 ;CHECK: vcgtQs32:
     96 ;CHECK: vcgt.s32
     97 	%tmp1 = load <4 x i32>* %A
     98 	%tmp2 = load <4 x i32>* %B
     99 	%tmp3 = icmp sgt <4 x i32> %tmp1, %tmp2
    100         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
    101 	ret <4 x i32> %tmp4
    102 }
    103 
    104 define <16 x i8> @vcgtQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    105 ;CHECK: vcgtQu8:
    106 ;CHECK: vcgt.u8
    107 	%tmp1 = load <16 x i8>* %A
    108 	%tmp2 = load <16 x i8>* %B
    109 	%tmp3 = icmp ugt <16 x i8> %tmp1, %tmp2
    110         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
    111 	ret <16 x i8> %tmp4
    112 }
    113 
    114 define <8 x i16> @vcgtQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    115 ;CHECK: vcgtQu16:
    116 ;CHECK: vcgt.u16
    117 	%tmp1 = load <8 x i16>* %A
    118 	%tmp2 = load <8 x i16>* %B
    119 	%tmp3 = icmp ugt <8 x i16> %tmp1, %tmp2
    120         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
    121 	ret <8 x i16> %tmp4
    122 }
    123 
    124 define <4 x i32> @vcgtQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    125 ;CHECK: vcgtQu32:
    126 ;CHECK: vcgt.u32
    127 	%tmp1 = load <4 x i32>* %A
    128 	%tmp2 = load <4 x i32>* %B
    129 	%tmp3 = icmp ugt <4 x i32> %tmp1, %tmp2
    130         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
    131 	ret <4 x i32> %tmp4
    132 }
    133 
    134 define <4 x i32> @vcgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
    135 ;CHECK: vcgtQf32:
    136 ;CHECK: vcgt.f32
    137 	%tmp1 = load <4 x float>* %A
    138 	%tmp2 = load <4 x float>* %B
    139 	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
    140         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
    141 	ret <4 x i32> %tmp4
    142 }
    143 
    144 define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
    145 ;CHECK: vacgtf32:
    146 ;CHECK: vacgt.f32
    147 	%tmp1 = load <2 x float>* %A
    148 	%tmp2 = load <2 x float>* %B
    149 	%tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2)
    150 	ret <2 x i32> %tmp3
    151 }
    152 
    153 define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
    154 ;CHECK: vacgtQf32:
    155 ;CHECK: vacgt.f32
    156 	%tmp1 = load <4 x float>* %A
    157 	%tmp2 = load <4 x float>* %B
    158 	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2)
    159 	ret <4 x i32> %tmp3
    160 }
    161 
    162 ; rdar://7923010
    163 define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
    164 ;CHECK: vcgt_zext:
    165 ;CHECK: vmov.i32 [[Q0:q[0-9]+]], #0x1
    166 ;CHECK: vcgt.f32 [[Q1:q[0-9]+]]
    167 ;CHECK: vand [[Q2:q[0-9]+]], [[Q1]], [[Q0]]
    168 	%tmp1 = load <4 x float>* %A
    169 	%tmp2 = load <4 x float>* %B
    170 	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
    171         %tmp4 = zext <4 x i1> %tmp3 to <4 x i32>
    172 	ret <4 x i32> %tmp4
    173 }
    174 
    175 declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
    176 declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
    177 
    178 define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind {
    179 ;CHECK: vcgti8Z:
    180 ;CHECK-NOT: vmov
    181 ;CHECK-NOT: vmvn
    182 ;CHECK: vcgt.s8
    183 	%tmp1 = load <8 x i8>* %A
    184 	%tmp3 = icmp sgt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
    185         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
    186 	ret <8 x i8> %tmp4
    187 }
    188 
    189 define <8 x i8> @vclti8Z(<8 x i8>* %A) nounwind {
    190 ;CHECK: vclti8Z:
    191 ;CHECK-NOT: vmov
    192 ;CHECK-NOT: vmvn
    193 ;CHECK: vclt.s8
    194 	%tmp1 = load <8 x i8>* %A
    195 	%tmp3 = icmp slt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
    196         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
    197 	ret <8 x i8> %tmp4
    198 }
    199