Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
      2 
      3 define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
      4 ;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
      5 	%tmp1 = fmul <2 x float> %A, %B;
      6 	%tmp2 = fadd <2 x float> %C, %tmp1;
      7 	ret <2 x float> %tmp2
      8 }
      9 
     10 define <4 x float> @fmla4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
     11 ;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
     12 	%tmp1 = fmul <4 x float> %A, %B;
     13 	%tmp2 = fadd <4 x float> %C, %tmp1;
     14 	ret <4 x float> %tmp2
     15 }
     16 
     17 define <2 x double> @fmla2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
     18 ;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
     19 	%tmp1 = fmul <2 x double> %A, %B;
     20 	%tmp2 = fadd <2 x double> %C, %tmp1;
     21 	ret <2 x double> %tmp2
     22 }
     23 
     24 
     25 define <2 x float> @fmls2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
     26 ;CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
     27 	%tmp1 = fmul <2 x float> %A, %B;
     28 	%tmp2 = fsub <2 x float> %C, %tmp1;
     29 	ret <2 x float> %tmp2
     30 }
     31 
     32 define <4 x float> @fmls4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
     33 ;CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
     34 	%tmp1 = fmul <4 x float> %A, %B;
     35 	%tmp2 = fsub <4 x float> %C, %tmp1;
     36 	ret <4 x float> %tmp2
     37 }
     38 
     39 define <2 x double> @fmls2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
     40 ;CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
     41 	%tmp1 = fmul <2 x double> %A, %B;
     42 	%tmp2 = fsub <2 x double> %C, %tmp1;
     43 	ret <2 x double> %tmp2
     44 }
     45 
     46 
     47 ; Another set of tests for when the intrinsic is used.
     48 
     49 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
     50 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
     51 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
     52 
     53 define <2 x float> @fmla2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
     54 ;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
     55         %val = call <2 x float> @llvm.fma.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C)
     56 	ret <2 x float> %val
     57 }
     58 
     59 define <4 x float> @fmla4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
     60 ;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
     61         %val = call <4 x float> @llvm.fma.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C)
     62 	ret <4 x float> %val
     63 }
     64 
     65 define <2 x double> @fmla2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
     66 ;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
     67         %val = call <2 x double> @llvm.fma.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C)
     68 	ret <2 x double> %val
     69 }
     70 
     71 define <2 x float> @fmls2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
     72 ;CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
     73         %negA = fsub <2 x float> <float -0.0, float -0.0>, %A
     74         %val = call <2 x float> @llvm.fma.v2f32(<2 x float> %negA, <2 x float> %B, <2 x float> %C)
     75 	ret <2 x float> %val
     76 }
     77 
     78 define <4 x float> @fmls4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
     79 ;CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
     80         %negA = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %A
     81         %val = call <4 x float> @llvm.fma.v4f32(<4 x float> %negA, <4 x float> %B, <4 x float> %C)
     82 	ret <4 x float> %val
     83 }
     84 
     85 define <2 x double> @fmls2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
     86 ;CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
     87         %negA = fsub <2 x double> <double -0.0, double -0.0>, %A
     88         %val = call <2 x double> @llvm.fma.v2f64(<2 x double> %negA, <2 x double> %B, <2 x double> %C)
     89 	ret <2 x double> %val
     90 }
     91 
     92 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>)
     93 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
     94 declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>)
     95 
     96 define <2 x float> @fmuladd2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
     97 ;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
     98         %val = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C)
     99 	ret <2 x float> %val
    100 }
    101 
    102 define <4 x float> @fmuladd4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
    103 ;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    104         %val = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C)
    105 	ret <4 x float> %val
    106 }
    107 
    108 define <2 x double> @fmuladd2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
    109 ;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
    110         %val = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C)
    111 	ret <2 x double> %val
    112 }
    113 
    114 
    115 ; Another set of tests that check for multiply single use
    116 
    117 define <2 x float> @fmla2xfloati_su(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
    118 ;CHECK-NOT: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    119   %tmp1 = fmul <2 x float> %A, %B;
    120   %tmp2 = fadd <2 x float> %C, %tmp1;
    121   %tmp3 = fadd <2 x float> %tmp2, %tmp1;
    122   ret <2 x float> %tmp3
    123 }
    124 
    125 define <2 x double> @fmls2xdouble_su(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
    126 ;CHECK-NOT: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
    127         %tmp1 = fmul <2 x double> %A, %B;
    128         %tmp2 = fsub <2 x double> %C, %tmp1;
    129         %tmp3 = fsub <2 x double> %tmp2, %tmp1;
    130         ret <2 x double> %tmp3
    131 }
    132 
    133