Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 
      3 ; This tests fcmp operations that do not map directly to NEON instructions.
      4 
      5 ; une is implemented with VCEQ/VMVN
      6 define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
      7 ;CHECK: vcunef32:
      8 ;CHECK: vceq.f32
      9 ;CHECK-NEXT: vmvn
     10 	%tmp1 = load <2 x float>* %A
     11 	%tmp2 = load <2 x float>* %B
     12 	%tmp3 = fcmp une <2 x float> %tmp1, %tmp2
     13         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
     14 	ret <2 x i32> %tmp4
     15 }
     16 
     17 ; olt is implemented with VCGT
     18 define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
     19 ;CHECK: vcoltf32:
     20 ;CHECK: vcgt.f32
     21 	%tmp1 = load <2 x float>* %A
     22 	%tmp2 = load <2 x float>* %B
     23 	%tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
     24         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
     25 	ret <2 x i32> %tmp4
     26 }
     27 
     28 ; ole is implemented with VCGE
     29 define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
     30 ;CHECK: vcolef32:
     31 ;CHECK: vcge.f32
     32 	%tmp1 = load <2 x float>* %A
     33 	%tmp2 = load <2 x float>* %B
     34 	%tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
     35         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
     36 	ret <2 x i32> %tmp4
     37 }
     38 
     39 ; uge is implemented with VCGT/VMVN
     40 define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
     41 ;CHECK: vcugef32:
     42 ;CHECK: vcgt.f32
     43 ;CHECK-NEXT: vmvn
     44 	%tmp1 = load <2 x float>* %A
     45 	%tmp2 = load <2 x float>* %B
     46 	%tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
     47         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
     48 	ret <2 x i32> %tmp4
     49 }
     50 
     51 ; ule is implemented with VCGT/VMVN
     52 define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
     53 ;CHECK: vculef32:
     54 ;CHECK: vcgt.f32
     55 ;CHECK-NEXT: vmvn
     56 	%tmp1 = load <2 x float>* %A
     57 	%tmp2 = load <2 x float>* %B
     58 	%tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
     59         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
     60 	ret <2 x i32> %tmp4
     61 }
     62 
     63 ; ugt is implemented with VCGE/VMVN
     64 define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
     65 ;CHECK: vcugtf32:
     66 ;CHECK: vcge.f32
     67 ;CHECK-NEXT: vmvn
     68 	%tmp1 = load <2 x float>* %A
     69 	%tmp2 = load <2 x float>* %B
     70 	%tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
     71         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
     72 	ret <2 x i32> %tmp4
     73 }
     74 
     75 ; ult is implemented with VCGE/VMVN
     76 define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
     77 ;CHECK: vcultf32:
     78 ;CHECK: vcge.f32
     79 ;CHECK-NEXT: vmvn
     80 	%tmp1 = load <2 x float>* %A
     81 	%tmp2 = load <2 x float>* %B
     82 	%tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
     83         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
     84 	ret <2 x i32> %tmp4
     85 }
     86 
     87 ; ueq is implemented with VCGT/VCGT/VORR/VMVN
     88 define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
     89 ;CHECK: vcueqf32:
     90 ;CHECK: vcgt.f32
     91 ;CHECK-NEXT: vcgt.f32
     92 ;CHECK-NEXT: vorr
     93 ;CHECK-NEXT: vmvn
     94 	%tmp1 = load <2 x float>* %A
     95 	%tmp2 = load <2 x float>* %B
     96 	%tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
     97         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
     98 	ret <2 x i32> %tmp4
     99 }
    100 
    101 ; one is implemented with VCGT/VCGT/VORR
    102 define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
    103 ;CHECK: vconef32:
    104 ;CHECK: vcgt.f32
    105 ;CHECK-NEXT: vcgt.f32
    106 ;CHECK-NEXT: vorr
    107 	%tmp1 = load <2 x float>* %A
    108 	%tmp2 = load <2 x float>* %B
    109 	%tmp3 = fcmp one <2 x float> %tmp1, %tmp2
    110         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
    111 	ret <2 x i32> %tmp4
    112 }
    113 
    114 ; uno is implemented with VCGT/VCGE/VORR/VMVN
    115 define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
    116 ;CHECK: vcunof32:
    117 ;CHECK: vcge.f32
    118 ;CHECK-NEXT: vcgt.f32
    119 ;CHECK-NEXT: vorr
    120 ;CHECK-NEXT: vmvn
    121 	%tmp1 = load <2 x float>* %A
    122 	%tmp2 = load <2 x float>* %B
    123 	%tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
    124         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
    125 	ret <2 x i32> %tmp4
    126 }
    127 
    128 ; ord is implemented with VCGT/VCGE/VORR
    129 define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
    130 ;CHECK: vcordf32:
    131 ;CHECK: vcge.f32
    132 ;CHECK-NEXT: vcgt.f32
    133 ;CHECK-NEXT: vorr
    134 	%tmp1 = load <2 x float>* %A
    135 	%tmp2 = load <2 x float>* %B
    136 	%tmp3 = fcmp ord <2 x float> %tmp1, %tmp2
    137         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
    138 	ret <2 x i32> %tmp4
    139 }
    140