1 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON 3 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 4 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9 5 ; RUN: llc < %s -mtriple=arm-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard | FileCheck %s -check-prefix=HARD 6 7 define float @t1(float %acc, float %a, float %b) { 8 entry: 9 ; VFP2: t1: 10 ; VFP2: vmla.f32 11 12 ; NEON: t1: 13 ; NEON: vmla.f32 14 15 ; A8: t1: 16 ; A8: vmul.f32 17 ; A8: vadd.f32 18 %0 = fmul float %a, %b 19 %1 = fadd float %acc, %0 20 ret float %1 21 } 22 23 define double @t2(double %acc, double %a, double %b) { 24 entry: 25 ; VFP2: t2: 26 ; VFP2: vmla.f64 27 28 ; NEON: t2: 29 ; NEON: vmla.f64 30 31 ; A8: t2: 32 ; A8: vmul.f64 33 ; A8: vadd.f64 34 %0 = fmul double %a, %b 35 %1 = fadd double %acc, %0 36 ret double %1 37 } 38 39 define float @t3(float %acc, float %a, float %b) { 40 entry: 41 ; VFP2: t3: 42 ; VFP2: vmla.f32 43 44 ; NEON: t3: 45 ; NEON: vmla.f32 46 47 ; A8: t3: 48 ; A8: vmul.f32 49 ; A8: vadd.f32 50 %0 = fmul float %a, %b 51 %1 = fadd float %0, %acc 52 ret float %1 53 } 54 55 ; It's possible to make use of fp vmla / vmls on Cortex-A9. 56 ; rdar://8659675 57 define void @t4(float %acc1, float %a, float %b, float %acc2, float %c, float* %P1, float* %P2) { 58 entry: 59 ; A8: t4: 60 ; A8: vmul.f32 61 ; A8: vmul.f32 62 ; A8: vadd.f32 63 ; A8: vadd.f32 64 65 ; Two vmla with now RAW hazard 66 ; A9: t4: 67 ; A9: vmla.f32 68 ; A9: vmla.f32 69 70 ; HARD: t4: 71 ; HARD: vmla.f32 s0, s1, s2 72 ; HARD: vmla.f32 s3, s1, s4 73 %0 = fmul float %a, %b 74 %1 = fadd float %acc1, %0 75 %2 = fmul float %a, %c 76 %3 = fadd float %acc2, %2 77 store float %1, float* %P1 78 store float %3, float* %P2 79 ret void 80 } 81 82 define float @t5(float %a, float %b, float %c, float %d, float %e) { 83 entry: 84 ; A8: t5: 85 ; A8: vmul.f32 86 ; A8: vmul.f32 87 ; A8: vadd.f32 88 ; A8: vadd.f32 89 90 ; A9: t5: 91 ; A9: vmla.f32 92 ; A9: vmul.f32 93 ; A9: vadd.f32 94 95 ; HARD: t5: 96 ; HARD: vmla.f32 s4, s0, s1 97 ; HARD: vmul.f32 s0, s2, s3 98 ; HARD: vadd.f32 s0, s4, s0 99 %0 = fmul float %a, %b 100 %1 = fadd float %e, %0 101 %2 = fmul float %c, %d 102 %3 = fadd float %1, %2 103 ret float %3 104 } 105