1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s 2 3 ; This tests fcmp operations that do not map directly to NEON instructions. 4 5 ; une is implemented with VCEQ/VMVN 6 define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind { 7 ;CHECK: vcunef32: 8 ;CHECK: vceq.f32 9 ;CHECK-NEXT: vmvn 10 %tmp1 = load <2 x float>* %A 11 %tmp2 = load <2 x float>* %B 12 %tmp3 = fcmp une <2 x float> %tmp1, %tmp2 13 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 14 ret <2 x i32> %tmp4 15 } 16 17 ; olt is implemented with VCGT 18 define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind { 19 ;CHECK: vcoltf32: 20 ;CHECK: vcgt.f32 21 %tmp1 = load <2 x float>* %A 22 %tmp2 = load <2 x float>* %B 23 %tmp3 = fcmp olt <2 x float> %tmp1, %tmp2 24 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 25 ret <2 x i32> %tmp4 26 } 27 28 ; ole is implemented with VCGE 29 define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind { 30 ;CHECK: vcolef32: 31 ;CHECK: vcge.f32 32 %tmp1 = load <2 x float>* %A 33 %tmp2 = load <2 x float>* %B 34 %tmp3 = fcmp ole <2 x float> %tmp1, %tmp2 35 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 36 ret <2 x i32> %tmp4 37 } 38 39 ; uge is implemented with VCGT/VMVN 40 define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind { 41 ;CHECK: vcugef32: 42 ;CHECK: vcgt.f32 43 ;CHECK-NEXT: vmvn 44 %tmp1 = load <2 x float>* %A 45 %tmp2 = load <2 x float>* %B 46 %tmp3 = fcmp uge <2 x float> %tmp1, %tmp2 47 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 48 ret <2 x i32> %tmp4 49 } 50 51 ; ule is implemented with VCGT/VMVN 52 define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind { 53 ;CHECK: vculef32: 54 ;CHECK: vcgt.f32 55 ;CHECK-NEXT: vmvn 56 %tmp1 = load <2 x float>* %A 57 %tmp2 = load <2 x float>* %B 58 %tmp3 = fcmp ule <2 x float> %tmp1, %tmp2 59 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 60 ret <2 x i32> %tmp4 61 } 62 63 ; ugt is implemented with VCGE/VMVN 64 define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind { 65 ;CHECK: vcugtf32: 66 ;CHECK: vcge.f32 67 ;CHECK-NEXT: vmvn 68 %tmp1 = load <2 x float>* %A 69 %tmp2 = load <2 x float>* %B 70 %tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2 71 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 72 ret <2 x i32> %tmp4 73 } 74 75 ; ult is implemented with VCGE/VMVN 76 define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind { 77 ;CHECK: vcultf32: 78 ;CHECK: vcge.f32 79 ;CHECK-NEXT: vmvn 80 %tmp1 = load <2 x float>* %A 81 %tmp2 = load <2 x float>* %B 82 %tmp3 = fcmp ult <2 x float> %tmp1, %tmp2 83 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 84 ret <2 x i32> %tmp4 85 } 86 87 ; ueq is implemented with VCGT/VCGT/VORR/VMVN 88 define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind { 89 ;CHECK: vcueqf32: 90 ;CHECK: vcgt.f32 91 ;CHECK-NEXT: vcgt.f32 92 ;CHECK-NEXT: vorr 93 ;CHECK-NEXT: vmvn 94 %tmp1 = load <2 x float>* %A 95 %tmp2 = load <2 x float>* %B 96 %tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2 97 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 98 ret <2 x i32> %tmp4 99 } 100 101 ; one is implemented with VCGT/VCGT/VORR 102 define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind { 103 ;CHECK: vconef32: 104 ;CHECK: vcgt.f32 105 ;CHECK-NEXT: vcgt.f32 106 ;CHECK-NEXT: vorr 107 %tmp1 = load <2 x float>* %A 108 %tmp2 = load <2 x float>* %B 109 %tmp3 = fcmp one <2 x float> %tmp1, %tmp2 110 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 111 ret <2 x i32> %tmp4 112 } 113 114 ; uno is implemented with VCGT/VCGE/VORR/VMVN 115 define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind { 116 ;CHECK: vcunof32: 117 ;CHECK: vcge.f32 118 ;CHECK-NEXT: vcgt.f32 119 ;CHECK-NEXT: vorr 120 ;CHECK-NEXT: vmvn 121 %tmp1 = load <2 x float>* %A 122 %tmp2 = load <2 x float>* %B 123 %tmp3 = fcmp uno <2 x float> %tmp1, %tmp2 124 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 125 ret <2 x i32> %tmp4 126 } 127 128 ; ord is implemented with VCGT/VCGE/VORR 129 define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind { 130 ;CHECK: vcordf32: 131 ;CHECK: vcge.f32 132 ;CHECK-NEXT: vcgt.f32 133 ;CHECK-NEXT: vorr 134 %tmp1 = load <2 x float>* %A 135 %tmp2 = load <2 x float>* %B 136 %tmp3 = fcmp ord <2 x float> %tmp1, %tmp2 137 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 138 ret <2 x i32> %tmp4 139 } 140