1 ; RUN: llc < %s -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN 2 ; RUN: llc < %s -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD 3 ; 4 ; Test PBQP is able to fulfill the accumulator chaining constraint. 5 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 6 target triple = "aarch64" 7 8 ; CHECK-LABEL: fir 9 ; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} 10 ; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} 11 ; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} 12 ; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} 13 ; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} 14 ; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} 15 ; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} 16 ; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} 17 ; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} 18 ; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} 19 ; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} 20 ; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} 21 ; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} 22 ; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} 23 define void @fir(double* nocapture %rx, double* nocapture %ry, double* nocapture %c, double* nocapture %x, double* nocapture %y) { 24 entry: 25 %0 = load double, double* %c, align 8 26 %1 = load double, double* %x, align 8 27 %mul = fmul fast double %1, %0 28 %2 = load double, double* %y, align 8 29 %mul7 = fmul fast double %2, %0 30 %arrayidx.1 = getelementptr inbounds double, double* %c, i64 1 31 %3 = load double, double* %arrayidx.1, align 8 32 %arrayidx2.1 = getelementptr inbounds double, double* %x, i64 1 33 %4 = load double, double* %arrayidx2.1, align 8 34 %mul.1 = fmul fast double %4, %3 35 %add.1 = fadd fast double %mul.1, %mul 36 %arrayidx6.1 = getelementptr inbounds double, double* %y, i64 1 37 %5 = load double, double* %arrayidx6.1, align 8 38 %mul7.1 = fmul fast double %5, %3 39 %add8.1 = fadd fast double %mul7.1, %mul7 40 %arrayidx.2 = getelementptr inbounds double, double* %c, i64 2 41 %6 = load double, double* %arrayidx.2, align 8 42 %arrayidx2.2 = getelementptr inbounds double, double* %x, i64 2 43 %7 = load double, double* %arrayidx2.2, align 8 44 %mul.2 = fmul fast double %7, %6 45 %add.2 = fadd fast double %mul.2, %add.1 46 %arrayidx6.2 = getelementptr inbounds double, double* %y, i64 2 47 %8 = load double, double* %arrayidx6.2, align 8 48 %mul7.2 = fmul fast double %8, %6 49 %add8.2 = fadd fast double %mul7.2, %add8.1 50 %arrayidx.3 = getelementptr inbounds double, double* %c, i64 3 51 %9 = load double, double* %arrayidx.3, align 8 52 %arrayidx2.3 = getelementptr inbounds double, double* %x, i64 3 53 %10 = load double, double* %arrayidx2.3, align 8 54 %mul.3 = fmul fast double %10, %9 55 %add.3 = fadd fast double %mul.3, %add.2 56 %arrayidx6.3 = getelementptr inbounds double, double* %y, i64 3 57 %11 = load double, double* %arrayidx6.3, align 8 58 %mul7.3 = fmul fast double %11, %9 59 %add8.3 = fadd fast double %mul7.3, %add8.2 60 %arrayidx.4 = getelementptr inbounds double, double* %c, i64 4 61 %12 = load double, double* %arrayidx.4, align 8 62 %arrayidx2.4 = getelementptr inbounds double, double* %x, i64 4 63 %13 = load double, double* %arrayidx2.4, align 8 64 %mul.4 = fmul fast double %13, %12 65 %add.4 = fadd fast double %mul.4, %add.3 66 %arrayidx6.4 = getelementptr inbounds double, double* %y, i64 4 67 %14 = load double, double* %arrayidx6.4, align 8 68 %mul7.4 = fmul fast double %14, %12 69 %add8.4 = fadd fast double %mul7.4, %add8.3 70 %arrayidx.5 = getelementptr inbounds double, double* %c, i64 5 71 %15 = load double, double* %arrayidx.5, align 8 72 %arrayidx2.5 = getelementptr inbounds double, double* %x, i64 5 73 %16 = load double, double* %arrayidx2.5, align 8 74 %mul.5 = fmul fast double %16, %15 75 %add.5 = fadd fast double %mul.5, %add.4 76 %arrayidx6.5 = getelementptr inbounds double, double* %y, i64 5 77 %17 = load double, double* %arrayidx6.5, align 8 78 %mul7.5 = fmul fast double %17, %15 79 %add8.5 = fadd fast double %mul7.5, %add8.4 80 %arrayidx.6 = getelementptr inbounds double, double* %c, i64 6 81 %18 = load double, double* %arrayidx.6, align 8 82 %arrayidx2.6 = getelementptr inbounds double, double* %x, i64 6 83 %19 = load double, double* %arrayidx2.6, align 8 84 %mul.6 = fmul fast double %19, %18 85 %add.6 = fadd fast double %mul.6, %add.5 86 %arrayidx6.6 = getelementptr inbounds double, double* %y, i64 6 87 %20 = load double, double* %arrayidx6.6, align 8 88 %mul7.6 = fmul fast double %20, %18 89 %add8.6 = fadd fast double %mul7.6, %add8.5 90 %arrayidx.7 = getelementptr inbounds double, double* %c, i64 7 91 %21 = load double, double* %arrayidx.7, align 8 92 %arrayidx2.7 = getelementptr inbounds double, double* %x, i64 7 93 %22 = load double, double* %arrayidx2.7, align 8 94 %mul.7 = fmul fast double %22, %21 95 %add.7 = fadd fast double %mul.7, %add.6 96 %arrayidx6.7 = getelementptr inbounds double, double* %y, i64 7 97 %23 = load double, double* %arrayidx6.7, align 8 98 %mul7.7 = fmul fast double %23, %21 99 %add8.7 = fadd fast double %mul7.7, %add8.6 100 store double %add.7, double* %rx, align 8 101 store double %add8.7, double* %ry, align 8 102 ret void 103 } 104 105