1 ; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fp-contract=fast | FileCheck %s 2 ; Check generated fused MAC and MLS. 3 4 define double @fusedMACTest1(double %d1, double %d2, double %d3) { 5 ;CHECK: fusedMACTest1: 6 ;CHECK: vfma.f64 7 %1 = fmul double %d1, %d2 8 %2 = fadd double %1, %d3 9 ret double %2 10 } 11 12 define float @fusedMACTest2(float %f1, float %f2, float %f3) { 13 ;CHECK: fusedMACTest2: 14 ;CHECK: vfma.f32 15 %1 = fmul float %f1, %f2 16 %2 = fadd float %1, %f3 17 ret float %2 18 } 19 20 define double @fusedMACTest3(double %d1, double %d2, double %d3) { 21 ;CHECK: fusedMACTest3: 22 ;CHECK: vfms.f64 23 %1 = fmul double %d2, %d3 24 %2 = fsub double %d1, %1 25 ret double %2 26 } 27 28 define float @fusedMACTest4(float %f1, float %f2, float %f3) { 29 ;CHECK: fusedMACTest4: 30 ;CHECK: vfms.f32 31 %1 = fmul float %f2, %f3 32 %2 = fsub float %f1, %1 33 ret float %2 34 } 35 36 define double @fusedMACTest5(double %d1, double %d2, double %d3) { 37 ;CHECK: fusedMACTest5: 38 ;CHECK: vfnma.f64 39 %1 = fmul double %d1, %d2 40 %2 = fsub double -0.0, %1 41 %3 = fsub double %2, %d3 42 ret double %3 43 } 44 45 define float @fusedMACTest6(float %f1, float %f2, float %f3) { 46 ;CHECK: fusedMACTest6: 47 ;CHECK: vfnma.f32 48 %1 = fmul float %f1, %f2 49 %2 = fsub float -0.0, %1 50 %3 = fsub float %2, %f3 51 ret float %3 52 } 53 54 define double @fusedMACTest7(double %d1, double %d2, double %d3) { 55 ;CHECK: fusedMACTest7: 56 ;CHECK: vfnms.f64 57 %1 = fmul double %d1, %d2 58 %2 = fsub double %1, %d3 59 ret double %2 60 } 61 62 define float @fusedMACTest8(float %f1, float %f2, float %f3) { 63 ;CHECK: fusedMACTest8: 64 ;CHECK: vfnms.f32 65 %1 = fmul float %f1, %f2 66 %2 = fsub float %1, %f3 67 ret float %2 68 } 69 70 define <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) { 71 ;CHECK: fusedMACTest9: 72 ;CHECK: vfma.f32 73 %mul = fmul <2 x float> %a, %b 74 %add = fadd <2 x float> %mul, %a 75 ret <2 x float> %add 76 } 77 78 define <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) { 79 ;CHECK: fusedMACTest10: 80 ;CHECK: vfms.f32 81 %mul = fmul <2 x float> %a, %b 82 %sub = fsub <2 x float> %a, %mul 83 ret <2 x float> %sub 84 } 85 86 define <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) { 87 ;CHECK: fusedMACTest11: 88 ;CHECK: vfma.f32 89 %mul = fmul <4 x float> %a, %b 90 %add = fadd <4 x float> %mul, %a 91 ret <4 x float> %add 92 } 93 94 define <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) { 95 ;CHECK: fusedMACTest12: 96 ;CHECK: vfms.f32 97 %mul = fmul <4 x float> %a, %b 98 %sub = fsub <4 x float> %a, %mul 99 ret <4 x float> %sub 100 } 101 102 define float @test_fma_f32(float %a, float %b, float %c) nounwind readnone ssp { 103 entry: 104 ; CHECK: test_fma_f32 105 ; CHECK: vfma.f32 106 %tmp1 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone 107 ret float %tmp1 108 } 109 110 define double @test_fma_f64(double %a, double %b, double %c) nounwind readnone ssp { 111 entry: 112 ; CHECK: test_fma_f64 113 ; CHECK: vfma.f64 114 %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone 115 ret double %tmp1 116 } 117 118 define <2 x float> @test_fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp { 119 entry: 120 ; CHECK: test_fma_v2f32 121 ; CHECK: vfma.f32 122 %tmp1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind 123 ret <2 x float> %tmp1 124 } 125 126 define double @test_fms_f64(double %a, double %b, double %c) nounwind readnone ssp { 127 entry: 128 ; CHECK: test_fms_f64 129 ; CHECK: vfms.f64 130 %tmp1 = fsub double -0.0, %a 131 %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone 132 ret double %tmp2 133 } 134 135 define double @test_fms_f64_2(double %a, double %b, double %c) nounwind readnone ssp { 136 entry: 137 ; CHECK: test_fms_f64_2 138 ; CHECK: vfms.f64 139 %tmp1 = fsub double -0.0, %b 140 %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone 141 ret double %tmp2 142 } 143 144 define float @test_fnms_f32(float %a, float %b, float* %c) nounwind readnone ssp { 145 ; CHECK: test_fnms_f32 146 ; CHECK: vfnms.f32 147 %tmp1 = load float* %c, align 4 148 %tmp2 = fsub float -0.0, %tmp1 149 %tmp3 = tail call float @llvm.fma.f32(float %a, float %b, float %tmp2) nounwind readnone 150 ret float %tmp3 151 } 152 153 define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp { 154 entry: 155 ; CHECK: test_fnms_f64 156 ; CHECK: vfnms.f64 157 %tmp1 = fsub double -0.0, %a 158 %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone 159 %tmp3 = fsub double -0.0, %tmp2 160 ret double %tmp3 161 } 162 163 define double @test_fnms_f64_2(double %a, double %b, double %c) nounwind readnone ssp { 164 entry: 165 ; CHECK: test_fnms_f64_2 166 ; CHECK: vfnms.f64 167 %tmp1 = fsub double -0.0, %b 168 %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone 169 %tmp3 = fsub double -0.0, %tmp2 170 ret double %tmp3 171 } 172 173 define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp { 174 entry: 175 ; CHECK: test_fnma_f64 176 ; CHECK: vfnma.f64 177 %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone 178 %tmp2 = fsub double -0.0, %tmp1 179 ret double %tmp2 180 } 181 182 define double @test_fnma_f64_2(double %a, double %b, double %c) nounwind readnone ssp { 183 entry: 184 ; CHECK: test_fnma_f64_2 185 ; CHECK: vfnma.f64 186 %tmp1 = fsub double -0.0, %a 187 %tmp2 = fsub double -0.0, %c 188 %tmp3 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %tmp2) nounwind readnone 189 ret double %tmp3 190 } 191 192 define float @test_fma_const_fold(float %a, float %b) nounwind { 193 ; CHECK: test_fma_const_fold 194 ; CHECK-NOT: vfma 195 ; CHECK-NOT: vmul 196 ; CHECK: vadd 197 %ret = call float @llvm.fma.f32(float %a, float 1.0, float %b) 198 ret float %ret 199 } 200 201 define float @test_fma_canonicalize(float %a, float %b) nounwind { 202 ; CHECK: test_fma_canonicalize 203 ; CHECK: vmov.f32 [[R1:s[0-9]+]], #2.000000e+00 204 ; CHECK: vfma.f32 {{s[0-9]+}}, {{s[0-9]+}}, [[R1]] 205 %ret = call float @llvm.fma.f32(float 2.0, float %a, float %b) 206 ret float %ret 207 } 208 209 ; Check that very wide vector fma's can be split into legal fma's. 210 define void @test_fma_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>* %p) nounwind readnone ssp { 211 ; CHECK: test_fma_v8f32 212 ; CHECK: vfma.f32 213 ; CHECK: vfma.f32 214 entry: 215 %call = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind readnone 216 store <8 x float> %call, <8 x float>* %p, align 16 217 ret void 218 } 219 220 221 declare float @llvm.fma.f32(float, float, float) nounwind readnone 222 declare double @llvm.fma.f64(double, double, double) nounwind readnone 223 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone 224 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 225