1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 2 3 define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { 4 ;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 5 %tmp1 = fmul <2 x float> %A, %B; 6 %tmp2 = fadd <2 x float> %C, %tmp1; 7 ret <2 x float> %tmp2 8 } 9 10 define <4 x float> @fmla4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) { 11 ;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 12 %tmp1 = fmul <4 x float> %A, %B; 13 %tmp2 = fadd <4 x float> %C, %tmp1; 14 ret <4 x float> %tmp2 15 } 16 17 define <2 x double> @fmla2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) { 18 ;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 19 %tmp1 = fmul <2 x double> %A, %B; 20 %tmp2 = fadd <2 x double> %C, %tmp1; 21 ret <2 x double> %tmp2 22 } 23 24 25 define <2 x float> @fmls2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { 26 ;CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 27 %tmp1 = fmul <2 x float> %A, %B; 28 %tmp2 = fsub <2 x float> %C, %tmp1; 29 ret <2 x float> %tmp2 30 } 31 32 define <4 x float> @fmls4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) { 33 ;CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 34 %tmp1 = fmul <4 x float> %A, %B; 35 %tmp2 = fsub <4 x float> %C, %tmp1; 36 ret <4 x float> %tmp2 37 } 38 39 define <2 x double> @fmls2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) { 40 ;CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 41 %tmp1 = fmul <2 x double> %A, %B; 42 %tmp2 = fsub <2 x double> %C, %tmp1; 43 ret <2 x double> %tmp2 44 } 45 46 47 ; Another set of tests for when the intrinsic is used. 48 49 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) 50 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 51 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) 52 53 define <2 x float> @fmla2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) { 54 ;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 55 %val = call <2 x float> @llvm.fma.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C) 56 ret <2 x float> %val 57 } 58 59 define <4 x float> @fmla4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) { 60 ;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 61 %val = call <4 x float> @llvm.fma.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C) 62 ret <4 x float> %val 63 } 64 65 define <2 x double> @fmla2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) { 66 ;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 67 %val = call <2 x double> @llvm.fma.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C) 68 ret <2 x double> %val 69 } 70 71 define <2 x float> @fmls2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) { 72 ;CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 73 %negA = fsub <2 x float> <float -0.0, float -0.0>, %A 74 %val = call <2 x float> @llvm.fma.v2f32(<2 x float> %negA, <2 x float> %B, <2 x float> %C) 75 ret <2 x float> %val 76 } 77 78 define <4 x float> @fmls4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) { 79 ;CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 80 %negA = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %A 81 %val = call <4 x float> @llvm.fma.v4f32(<4 x float> %negA, <4 x float> %B, <4 x float> %C) 82 ret <4 x float> %val 83 } 84 85 define <2 x double> @fmls2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) { 86 ;CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 87 %negA = fsub <2 x double> <double -0.0, double -0.0>, %A 88 %val = call <2 x double> @llvm.fma.v2f64(<2 x double> %negA, <2 x double> %B, <2 x double> %C) 89 ret <2 x double> %val 90 } 91 92 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) 93 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) 94 declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) 95 96 define <2 x float> @fmuladd2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { 97 ;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 98 %val = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C) 99 ret <2 x float> %val 100 } 101 102 define <4 x float> @fmuladd4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) { 103 ;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 104 %val = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C) 105 ret <4 x float> %val 106 } 107 108 define <2 x double> @fmuladd2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) { 109 ;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 110 %val = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C) 111 ret <2 x double> %val 112 } 113 114 115 ; Another set of tests that check for multiply single use 116 117 define <2 x float> @fmla2xfloati_su(<2 x float> %A, <2 x float> %B, <2 x float> %C) { 118 ;CHECK-NOT: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 119 %tmp1 = fmul <2 x float> %A, %B; 120 %tmp2 = fadd <2 x float> %C, %tmp1; 121 %tmp3 = fadd <2 x float> %tmp2, %tmp1; 122 ret <2 x float> %tmp3 123 } 124 125 define <2 x double> @fmls2xdouble_su(<2 x double> %A, <2 x double> %B, <2 x double> %C) { 126 ;CHECK-NOT: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 127 %tmp1 = fmul <2 x double> %A, %B; 128 %tmp2 = fsub <2 x double> %C, %tmp1; 129 %tmp3 = fsub <2 x double> %tmp2, %tmp1; 130 ret <2 x double> %tmp3 131 } 132 133