Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mtriple=thumbv7-apple-ios7.0.0 -float-abi=hard -mcpu=cortex-a9 -misched-postra -enable-misched -pre-RA-sched=source -scheditins=false | FileCheck %s
      2 ;
      3 ; Test MI-Sched suppory latency based stalls on in in-order pipeline
      4 ; using the new machine model.
      5 
      6 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
      7 
      8 ; Don't be too strict with the top of the schedule, but most of it
      9 ; should be nicely pipelined.
     10 ;
     11 ; CHECK: saxpy10:
     12 ; CHECK: vldr
     13 ; CHECK: vldr
     14 ; CHECK: vldr
     15 ; CHECK: vldr
     16 ; CHECK: vldr
     17 ; CHECK-NEXT: vldr
     18 ; CHECK-NEXT: vmul
     19 ; CHECK-NEXT: vadd
     20 ; CHECK-NEXT: vadd
     21 ; CHECK-NEXT: vldr
     22 ; CHECK-NEXT: vldr
     23 ; CHECK-NEXT: vadd
     24 ; CHECK-NEXT: vadd
     25 ; CHECK-NEXT: vmul
     26 ; CHECK-NEXT: vldr
     27 ; CHECK-NEXT: vadd
     28 ; CHECK-NEXT: vadd
     29 ; CHECK-NEXT: vldr
     30 ; CHECK-NEXT: vmul
     31 ; CHECK-NEXT: vldr
     32 ; CHECK-NEXT: vadd
     33 ; CHECK-NEXT: vldr
     34 ; CHECK-NEXT: vadd
     35 ; CHECK-NEXT: vldr
     36 ; CHECK-NEXT: vmul
     37 ; CHECK-NEXT: vadd
     38 ; CHECK-NEXT: vldr
     39 ; CHECK-NEXT: vadd
     40 ; CHECK-NEXT: vldr
     41 ; CHECK-NEXT: vmul
     42 ; CHECK-NEXT: vadd
     43 ; CHECK-NEXT: vldr
     44 ; CHECK-NEXT: vadd
     45 ; CHECK-NEXT: vldr
     46 ; CHECK-NEXT: vmul
     47 ; CHECK-NEXT: vadd
     48 ; CHECK-NEXT: vldr
     49 ; CHECK-NEXT: vadd
     50 ; CHECK-NEXT: vldr
     51 ; CHECK-NEXT: vmul
     52 ; CHECK-NEXT: vadd
     53 ; CHECK-NEXT: vldr
     54 ; CHECK-NEXT: vmul
     55 ; CHECK-NEXT: vadd
     56 ; CHECK-NEXT: vldr
     57 ; CHECK-NEXT: vadd
     58 ; CHECK-NEXT: vadd
     59 ; CHECK-NEXT: vadd
     60 ; CHECK-NEXT: vmov
     61 ; CHECK-NEXT: bx
     62 ;
     63 ; This accumulates a sum rather than storing each result.
     64 define float @saxpy10(float* nocapture readonly %data1, float* nocapture readonly %data2, float %a) {
     65 entry:
     66   %0 = load float, float* %data1, align 4
     67   %mul = fmul float %0, %a
     68   %1 = load float, float* %data2, align 4
     69   %add = fadd float %mul, %1
     70   %add2 = fadd float %add, 0.000000e+00
     71   %arrayidx.1 = getelementptr inbounds float, float* %data1, i32 1
     72   %2 = load float, float* %arrayidx.1, align 4
     73   %mul.1 = fmul float %2, %a
     74   %arrayidx1.1 = getelementptr inbounds float, float* %data2, i32 1
     75   %3 = load float, float* %arrayidx1.1, align 4
     76   %add.1 = fadd float %mul.1, %3
     77   %add2.1 = fadd float %add2, %add.1
     78   %arrayidx.2 = getelementptr inbounds float, float* %data1, i32 2
     79   %4 = load float, float* %arrayidx.2, align 4
     80   %mul.2 = fmul float %4, %a
     81   %arrayidx1.2 = getelementptr inbounds float, float* %data2, i32 2
     82   %5 = load float, float* %arrayidx1.2, align 4
     83   %add.2 = fadd float %mul.2, %5
     84   %add2.2 = fadd float %add2.1, %add.2
     85   %arrayidx.3 = getelementptr inbounds float, float* %data1, i32 3
     86   %6 = load float, float* %arrayidx.3, align 4
     87   %mul.3 = fmul float %6, %a
     88   %arrayidx1.3 = getelementptr inbounds float, float* %data2, i32 3
     89   %7 = load float, float* %arrayidx1.3, align 4
     90   %add.3 = fadd float %mul.3, %7
     91   %add2.3 = fadd float %add2.2, %add.3
     92   %arrayidx.4 = getelementptr inbounds float, float* %data1, i32 4
     93   %8 = load float, float* %arrayidx.4, align 4
     94   %mul.4 = fmul float %8, %a
     95   %arrayidx1.4 = getelementptr inbounds float, float* %data2, i32 4
     96   %9 = load float, float* %arrayidx1.4, align 4
     97   %add.4 = fadd float %mul.4, %9
     98   %add2.4 = fadd float %add2.3, %add.4
     99   %arrayidx.5 = getelementptr inbounds float, float* %data1, i32 5
    100   %10 = load float, float* %arrayidx.5, align 4
    101   %mul.5 = fmul float %10, %a
    102   %arrayidx1.5 = getelementptr inbounds float, float* %data2, i32 5
    103   %11 = load float, float* %arrayidx1.5, align 4
    104   %add.5 = fadd float %mul.5, %11
    105   %add2.5 = fadd float %add2.4, %add.5
    106   %arrayidx.6 = getelementptr inbounds float, float* %data1, i32 6
    107   %12 = load float, float* %arrayidx.6, align 4
    108   %mul.6 = fmul float %12, %a
    109   %arrayidx1.6 = getelementptr inbounds float, float* %data2, i32 6
    110   %13 = load float, float* %arrayidx1.6, align 4
    111   %add.6 = fadd float %mul.6, %13
    112   %add2.6 = fadd float %add2.5, %add.6
    113   %arrayidx.7 = getelementptr inbounds float, float* %data1, i32 7
    114   %14 = load float, float* %arrayidx.7, align 4
    115   %mul.7 = fmul float %14, %a
    116   %arrayidx1.7 = getelementptr inbounds float, float* %data2, i32 7
    117   %15 = load float, float* %arrayidx1.7, align 4
    118   %add.7 = fadd float %mul.7, %15
    119   %add2.7 = fadd float %add2.6, %add.7
    120   %arrayidx.8 = getelementptr inbounds float, float* %data1, i32 8
    121   %16 = load float, float* %arrayidx.8, align 4
    122   %mul.8 = fmul float %16, %a
    123   %arrayidx1.8 = getelementptr inbounds float, float* %data2, i32 8
    124   %17 = load float, float* %arrayidx1.8, align 4
    125   %add.8 = fadd float %mul.8, %17
    126   %add2.8 = fadd float %add2.7, %add.8
    127   %arrayidx.9 = getelementptr inbounds float, float* %data1, i32 9
    128   %18 = load float, float* %arrayidx.9, align 4
    129   %mul.9 = fmul float %18, %a
    130   %arrayidx1.9 = getelementptr inbounds float, float* %data2, i32 9
    131   %19 = load float, float* %arrayidx1.9, align 4
    132   %add.9 = fadd float %mul.9, %19
    133   %add2.9 = fadd float %add2.8, %add.9
    134   ret float %add2.9
    135 }
    136