1 ; RUN: llc < %s -march=arm -mtriple=thumbv7-apple-ios7.0.0 -float-abi=hard -mcpu=cortex-a9 -misched-postra -enable-misched -pre-RA-sched=source -scheditins=false | FileCheck %s 2 ; 3 ; Test MI-Sched suppory latency based stalls on in in-order pipeline 4 ; using the new machine model. 5 6 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" 7 8 ; Don't be too strict with the top of the schedule, but most of it 9 ; should be nicely pipelined. 10 ; 11 ; CHECK: saxpy10: 12 ; CHECK: vldr 13 ; CHECK: vldr 14 ; CHECK: vldr 15 ; CHECK: vldr 16 ; CHECK: vldr 17 ; CHECK-NEXT: vldr 18 ; CHECK-NEXT: vmul 19 ; CHECK-NEXT: vadd 20 ; CHECK-NEXT: vadd 21 ; CHECK-NEXT: vldr 22 ; CHECK-NEXT: vldr 23 ; CHECK-NEXT: vadd 24 ; CHECK-NEXT: vadd 25 ; CHECK-NEXT: vmul 26 ; CHECK-NEXT: vldr 27 ; CHECK-NEXT: vadd 28 ; CHECK-NEXT: vadd 29 ; CHECK-NEXT: vldr 30 ; CHECK-NEXT: vmul 31 ; CHECK-NEXT: vldr 32 ; CHECK-NEXT: vadd 33 ; CHECK-NEXT: vldr 34 ; CHECK-NEXT: vadd 35 ; CHECK-NEXT: vldr 36 ; CHECK-NEXT: vmul 37 ; CHECK-NEXT: vadd 38 ; CHECK-NEXT: vldr 39 ; CHECK-NEXT: vadd 40 ; CHECK-NEXT: vldr 41 ; CHECK-NEXT: vmul 42 ; CHECK-NEXT: vadd 43 ; CHECK-NEXT: vldr 44 ; CHECK-NEXT: vadd 45 ; CHECK-NEXT: vldr 46 ; CHECK-NEXT: vmul 47 ; CHECK-NEXT: vadd 48 ; CHECK-NEXT: vldr 49 ; CHECK-NEXT: vadd 50 ; CHECK-NEXT: vldr 51 ; CHECK-NEXT: vmul 52 ; CHECK-NEXT: vadd 53 ; CHECK-NEXT: vldr 54 ; CHECK-NEXT: vmul 55 ; CHECK-NEXT: vadd 56 ; CHECK-NEXT: vldr 57 ; CHECK-NEXT: vadd 58 ; CHECK-NEXT: vadd 59 ; CHECK-NEXT: vadd 60 ; CHECK-NEXT: vmov 61 ; CHECK-NEXT: bx 62 ; 63 ; This accumulates a sum rather than storing each result. 64 define float @saxpy10(float* nocapture readonly %data1, float* nocapture readonly %data2, float %a) { 65 entry: 66 %0 = load float, float* %data1, align 4 67 %mul = fmul float %0, %a 68 %1 = load float, float* %data2, align 4 69 %add = fadd float %mul, %1 70 %add2 = fadd float %add, 0.000000e+00 71 %arrayidx.1 = getelementptr inbounds float, float* %data1, i32 1 72 %2 = load float, float* %arrayidx.1, align 4 73 %mul.1 = fmul float %2, %a 74 %arrayidx1.1 = getelementptr inbounds float, float* %data2, i32 1 75 %3 = load float, float* %arrayidx1.1, align 4 76 %add.1 = fadd float %mul.1, %3 77 %add2.1 = fadd float %add2, %add.1 78 %arrayidx.2 = getelementptr inbounds float, float* %data1, i32 2 79 %4 = load float, float* %arrayidx.2, align 4 80 %mul.2 = fmul float %4, %a 81 %arrayidx1.2 = getelementptr inbounds float, float* %data2, i32 2 82 %5 = load float, float* %arrayidx1.2, align 4 83 %add.2 = fadd float %mul.2, %5 84 %add2.2 = fadd float %add2.1, %add.2 85 %arrayidx.3 = getelementptr inbounds float, float* %data1, i32 3 86 %6 = load float, float* %arrayidx.3, align 4 87 %mul.3 = fmul float %6, %a 88 %arrayidx1.3 = getelementptr inbounds float, float* %data2, i32 3 89 %7 = load float, float* %arrayidx1.3, align 4 90 %add.3 = fadd float %mul.3, %7 91 %add2.3 = fadd float %add2.2, %add.3 92 %arrayidx.4 = getelementptr inbounds float, float* %data1, i32 4 93 %8 = load float, float* %arrayidx.4, align 4 94 %mul.4 = fmul float %8, %a 95 %arrayidx1.4 = getelementptr inbounds float, float* %data2, i32 4 96 %9 = load float, float* %arrayidx1.4, align 4 97 %add.4 = fadd float %mul.4, %9 98 %add2.4 = fadd float %add2.3, %add.4 99 %arrayidx.5 = getelementptr inbounds float, float* %data1, i32 5 100 %10 = load float, float* %arrayidx.5, align 4 101 %mul.5 = fmul float %10, %a 102 %arrayidx1.5 = getelementptr inbounds float, float* %data2, i32 5 103 %11 = load float, float* %arrayidx1.5, align 4 104 %add.5 = fadd float %mul.5, %11 105 %add2.5 = fadd float %add2.4, %add.5 106 %arrayidx.6 = getelementptr inbounds float, float* %data1, i32 6 107 %12 = load float, float* %arrayidx.6, align 4 108 %mul.6 = fmul float %12, %a 109 %arrayidx1.6 = getelementptr inbounds float, float* %data2, i32 6 110 %13 = load float, float* %arrayidx1.6, align 4 111 %add.6 = fadd float %mul.6, %13 112 %add2.6 = fadd float %add2.5, %add.6 113 %arrayidx.7 = getelementptr inbounds float, float* %data1, i32 7 114 %14 = load float, float* %arrayidx.7, align 4 115 %mul.7 = fmul float %14, %a 116 %arrayidx1.7 = getelementptr inbounds float, float* %data2, i32 7 117 %15 = load float, float* %arrayidx1.7, align 4 118 %add.7 = fadd float %mul.7, %15 119 %add2.7 = fadd float %add2.6, %add.7 120 %arrayidx.8 = getelementptr inbounds float, float* %data1, i32 8 121 %16 = load float, float* %arrayidx.8, align 4 122 %mul.8 = fmul float %16, %a 123 %arrayidx1.8 = getelementptr inbounds float, float* %data2, i32 8 124 %17 = load float, float* %arrayidx1.8, align 4 125 %add.8 = fadd float %mul.8, %17 126 %add2.8 = fadd float %add2.7, %add.8 127 %arrayidx.9 = getelementptr inbounds float, float* %data1, i32 9 128 %18 = load float, float* %arrayidx.9, align 4 129 %mul.9 = fmul float %18, %a 130 %arrayidx1.9 = getelementptr inbounds float, float* %data2, i32 9 131 %19 = load float, float* %arrayidx1.9, align 4 132 %add.9 = fadd float %mul.9, %19 133 %add2.9 = fadd float %add2.8, %add.9 134 ret float %add2.9 135 } 136