Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-EVEN
      2 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-ODD
      3 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-EVEN
      4 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-ODD
      5 
      6 ; The following tests use the balance-fp-ops feature, and should be independent of
      7 ; the target cpu.
      8 
      9 ; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN --check-prefix CHECK-BALFP
     10 ; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops  -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD --check-prefix CHECK-BALFP
     11 
     12 ; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so
     13 ; our test strategy is to:
     14 ;   * Force the pass to always perform register swapping even if the dest register is of the
     15 ;     correct color already (-force-all)
     16 ;   * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance),
     17 ;     and run it twice, once where it always hints odd, and once where it always hints even.
     18 ;
     19 ; We then use regex magic to check that in the two cases the register allocation is
     20 ; different; this is what gives us the testing coverage and distinguishes cases where
     21 ; the pass has done some work versus accidental regalloc.
     22 
     23 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
     24 target triple = "aarch64"
     25 
     26 ; Non-overlapping groups - shouldn't need any changing at all.
     27 
     28 ; CHECK-LABEL: f1:
     29 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
     30 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
     31 ; CHECK: fmadd [[x]]
     32 ; CHECK: fmsub [[x]]
     33 ; CHECK: fmadd [[x]]
     34 ; CHECK: str [[x]]
     35 
     36 define void @f1(double* nocapture readonly %p, double* nocapture %q) #0 {
     37 entry:
     38   %0 = load double, double* %p, align 8
     39   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
     40   %1 = load double, double* %arrayidx1, align 8
     41   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
     42   %2 = load double, double* %arrayidx2, align 8
     43   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
     44   %3 = load double, double* %arrayidx3, align 8
     45   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
     46   %4 = load double, double* %arrayidx4, align 8
     47   %mul = fmul fast double %0, %1
     48   %add = fadd fast double %mul, %4
     49   %mul5 = fmul fast double %1, %2
     50   %add6 = fadd fast double %mul5, %add
     51   %mul7 = fmul fast double %1, %3
     52   %sub = fsub fast double %add6, %mul7
     53   %mul8 = fmul fast double %2, %3
     54   %add9 = fadd fast double %mul8, %sub
     55   store double %add9, double* %q, align 8
     56   %arrayidx11 = getelementptr inbounds double, double* %p, i64 5
     57   %5 = load double, double* %arrayidx11, align 8
     58   %arrayidx12 = getelementptr inbounds double, double* %p, i64 6
     59   %6 = load double, double* %arrayidx12, align 8
     60   %arrayidx13 = getelementptr inbounds double, double* %p, i64 7
     61   %7 = load double, double* %arrayidx13, align 8
     62   %mul15 = fmul fast double %6, %7
     63   %mul16 = fmul fast double %0, %5
     64   %add17 = fadd fast double %mul16, %mul15
     65   %mul18 = fmul fast double %5, %6
     66   %add19 = fadd fast double %mul18, %add17
     67   %arrayidx20 = getelementptr inbounds double, double* %q, i64 1
     68   store double %add19, double* %arrayidx20, align 8
     69   ret void
     70 }
     71 
     72 ; Overlapping groups - coloring needed.
     73 
     74 ; CHECK-LABEL: f2:
     75 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
     76 ; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]]
     77 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
     78 ; CHECK-ODD: fmul [[y:d[0-9]*[02468]]]
     79 ; CHECK: fmadd [[x]]
     80 ; CHECK: fmadd [[y]]
     81 ; CHECK: fmsub [[x]]
     82 ; CHECK: fmadd [[y]]
     83 ; CHECK: fmadd [[x]]
     84 ; CHECK-BALFP: stp [[x]], [[y]]
     85 ; CHECK-A53-DAG: str [[x]]
     86 ; CHECK-A53-DAG: str [[y]]
     87 
     88 define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 {
     89 entry:
     90   %0 = load double, double* %p, align 8
     91   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
     92   %1 = load double, double* %arrayidx1, align 8
     93   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
     94   %2 = load double, double* %arrayidx2, align 8
     95   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
     96   %3 = load double, double* %arrayidx3, align 8
     97   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
     98   %4 = load double, double* %arrayidx4, align 8
     99   %arrayidx5 = getelementptr inbounds double, double* %p, i64 5
    100   %5 = load double, double* %arrayidx5, align 8
    101   %arrayidx6 = getelementptr inbounds double, double* %p, i64 6
    102   %6 = load double, double* %arrayidx6, align 8
    103   %arrayidx7 = getelementptr inbounds double, double* %p, i64 7
    104   %7 = load double, double* %arrayidx7, align 8
    105   %mul = fmul fast double %0, %1
    106   %add = fadd fast double %mul, %7
    107   %mul8 = fmul fast double %5, %6
    108   %mul9 = fmul fast double %1, %2
    109   %add10 = fadd fast double %mul9, %add
    110   %mul11 = fmul fast double %3, %4
    111   %add12 = fadd fast double %mul11, %mul8
    112   %mul13 = fmul fast double %1, %3
    113   %sub = fsub fast double %add10, %mul13
    114   %mul14 = fmul fast double %4, %5
    115   %add15 = fadd fast double %mul14, %add12
    116   %mul16 = fmul fast double %2, %3
    117   %add17 = fadd fast double %mul16, %sub
    118   store double %add17, double* %q, align 8
    119   %arrayidx19 = getelementptr inbounds double, double* %q, i64 1
    120   store double %add15, double* %arrayidx19, align 8
    121   ret void
    122 }
    123 
    124 ; Dest register is live on block exit - fixup needed.
    125 
    126 ; CHECK-LABEL: f3:
    127 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
    128 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
    129 ; CHECK: fmadd [[x]]
    130 ; CHECK: fmsub [[x]]
    131 ; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]]
    132 ; CHECK: str [[y]]
    133 
    134 define void @f3(double* nocapture readonly %p, double* nocapture %q) #0 {
    135 entry:
    136   %0 = load double, double* %p, align 8
    137   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
    138   %1 = load double, double* %arrayidx1, align 8
    139   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
    140   %2 = load double, double* %arrayidx2, align 8
    141   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
    142   %3 = load double, double* %arrayidx3, align 8
    143   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
    144   %4 = load double, double* %arrayidx4, align 8
    145   %mul = fmul fast double %0, %1
    146   %add = fadd fast double %mul, %4
    147   %mul5 = fmul fast double %1, %2
    148   %add6 = fadd fast double %mul5, %add
    149   %mul7 = fmul fast double %1, %3
    150   %sub = fsub fast double %add6, %mul7
    151   %mul8 = fmul fast double %2, %3
    152   %add9 = fadd fast double %mul8, %sub
    153   %cmp = fcmp oeq double %3, 0.000000e+00
    154   br i1 %cmp, label %if.then, label %if.end
    155 
    156 if.then:                                          ; preds = %entry
    157   tail call void bitcast (void (...)* @g to void ()*)() #2
    158   br label %if.end
    159 
    160 if.end:                                           ; preds = %if.then, %entry
    161   store double %add9, double* %q, align 8
    162   ret void
    163 }
    164 
    165 declare void @g(...) #1
    166 
    167 ; Single precision version of f2.
    168 
    169 ; CHECK-LABEL: f4:
    170 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
    171 ; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]]
    172 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
    173 ; CHECK-ODD: fmul [[y:s[0-9]*[02468]]]
    174 ; CHECK: fmadd [[x]]
    175 ; CHECK: fmadd [[y]]
    176 ; CHECK: fmsub [[x]]
    177 ; CHECK: fmadd [[y]]
    178 ; CHECK: fmadd [[x]]
    179 ; CHECK-BALFP: stp [[x]], [[y]]
    180 ; CHECK-A53-DAG: str [[x]]
    181 ; CHECK-A53-DAG: str [[y]]
    182 
    183 define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 {
    184 entry:
    185   %0 = load float, float* %p, align 4
    186   %arrayidx1 = getelementptr inbounds float, float* %p, i64 1
    187   %1 = load float, float* %arrayidx1, align 4
    188   %arrayidx2 = getelementptr inbounds float, float* %p, i64 2
    189   %2 = load float, float* %arrayidx2, align 4
    190   %arrayidx3 = getelementptr inbounds float, float* %p, i64 3
    191   %3 = load float, float* %arrayidx3, align 4
    192   %arrayidx4 = getelementptr inbounds float, float* %p, i64 4
    193   %4 = load float, float* %arrayidx4, align 4
    194   %arrayidx5 = getelementptr inbounds float, float* %p, i64 5
    195   %5 = load float, float* %arrayidx5, align 4
    196   %arrayidx6 = getelementptr inbounds float, float* %p, i64 6
    197   %6 = load float, float* %arrayidx6, align 4
    198   %arrayidx7 = getelementptr inbounds float, float* %p, i64 7
    199   %7 = load float, float* %arrayidx7, align 4
    200   %mul = fmul fast float %0, %1
    201   %add = fadd fast float %mul, %7
    202   %mul8 = fmul fast float %5, %6
    203   %mul9 = fmul fast float %1, %2
    204   %add10 = fadd fast float %mul9, %add
    205   %mul11 = fmul fast float %3, %4
    206   %add12 = fadd fast float %mul11, %mul8
    207   %mul13 = fmul fast float %1, %3
    208   %sub = fsub fast float %add10, %mul13
    209   %mul14 = fmul fast float %4, %5
    210   %add15 = fadd fast float %mul14, %add12
    211   %mul16 = fmul fast float %2, %3
    212   %add17 = fadd fast float %mul16, %sub
    213   store float %add17, float* %q, align 4
    214   %arrayidx19 = getelementptr inbounds float, float* %q, i64 1
    215   store float %add15, float* %arrayidx19, align 4
    216   ret void
    217 }
    218 
    219 ; Single precision version of f3
    220 
    221 ; CHECK-LABEL: f5:
    222 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
    223 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
    224 ; CHECK: fmadd [[x]]
    225 ; CHECK: fmsub [[x]]
    226 ; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]]
    227 ; CHECK: str [[y]]
    228 
    229 define void @f5(float* nocapture readonly %p, float* nocapture %q) #0 {
    230 entry:
    231   %0 = load float, float* %p, align 4
    232   %arrayidx1 = getelementptr inbounds float, float* %p, i64 1
    233   %1 = load float, float* %arrayidx1, align 4
    234   %arrayidx2 = getelementptr inbounds float, float* %p, i64 2
    235   %2 = load float, float* %arrayidx2, align 4
    236   %arrayidx3 = getelementptr inbounds float, float* %p, i64 3
    237   %3 = load float, float* %arrayidx3, align 4
    238   %arrayidx4 = getelementptr inbounds float, float* %p, i64 4
    239   %4 = load float, float* %arrayidx4, align 4
    240   %mul = fmul fast float %0, %1
    241   %add = fadd fast float %mul, %4
    242   %mul5 = fmul fast float %1, %2
    243   %add6 = fadd fast float %mul5, %add
    244   %mul7 = fmul fast float %1, %3
    245   %sub = fsub fast float %add6, %mul7
    246   %mul8 = fmul fast float %2, %3
    247   %add9 = fadd fast float %mul8, %sub
    248   %cmp = fcmp oeq float %3, 0.000000e+00
    249   br i1 %cmp, label %if.then, label %if.end
    250 
    251 if.then:                                          ; preds = %entry
    252   tail call void bitcast (void (...)* @g to void ()*)() #2
    253   br label %if.end
    254 
    255 if.end:                                           ; preds = %if.then, %entry
    256   store float %add9, float* %q, align 4
    257   ret void
    258 }
    259 
    260 ; Test that regmask clobbering stops a chain sequence.
    261 
    262 ; CHECK-LABEL: f6:
    263 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
    264 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
    265 ; CHECK: fmadd [[x]]
    266 ; CHECK: fmsub [[x]]
    267 ; CHECK: fmadd d0, {{.*}}, [[x]]
    268 ; CHECK: bl hh
    269 ; CHECK: str d0
    270 
    271 define void @f6(double* nocapture readonly %p, double* nocapture %q) #0 {
    272 entry:
    273   %0 = load double, double* %p, align 8
    274   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
    275   %1 = load double, double* %arrayidx1, align 8
    276   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
    277   %2 = load double, double* %arrayidx2, align 8
    278   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
    279   %3 = load double, double* %arrayidx3, align 8
    280   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
    281   %4 = load double, double* %arrayidx4, align 8
    282   %mul = fmul fast double %0, %1
    283   %add = fadd fast double %mul, %4
    284   %mul5 = fmul fast double %1, %2
    285   %add6 = fadd fast double %mul5, %add
    286   %mul7 = fmul fast double %1, %3
    287   %sub = fsub fast double %add6, %mul7
    288   %mul8 = fmul fast double %2, %3
    289   %add9 = fadd fast double %mul8, %sub
    290   %call = tail call double @hh(double %add9) #2
    291   store double %call, double* %q, align 8
    292   ret void
    293 }
    294 
    295 declare double @hh(double) #1
    296 
    297 ; Check that we correctly deal with repeated operands.
    298 ; The following testcase creates:
    299 ;   %d1 = FADDDrr killed %d0, %d0
    300 ; We'll get a crash if we naively look at the first operand, remove it
    301 ; from the substitution list then look at the second operand.
    302 
    303 ; CHECK: fmadd [[x:d[0-9]+]]
    304 ; CHECK: fadd d1, [[x]], [[x]]
    305 
    306 define void @f7(double* nocapture readonly %p, double* nocapture %q) #0 {
    307 entry:
    308   %0 = load double, double* %p, align 8
    309   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
    310   %1 = load double, double* %arrayidx1, align 8
    311   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
    312   %2 = load double, double* %arrayidx2, align 8
    313   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
    314   %3 = load double, double* %arrayidx3, align 8
    315   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
    316   %4 = load double, double* %arrayidx4, align 8
    317   %mul = fmul fast double %0, %1
    318   %add = fadd fast double %mul, %4
    319   %mul5 = fmul fast double %1, %2
    320   %add6 = fadd fast double %mul5, %add
    321   %mul7 = fmul fast double %1, %3
    322   %sub = fsub fast double %add6, %mul7
    323   %mul8 = fmul fast double %2, %3
    324   %add9 = fadd fast double %mul8, %sub
    325   %add10 = fadd fast double %add9, %add9
    326   call void @hhh(double 0.0, double %add10)
    327   ret void
    328 }
    329 
    330 declare void @hhh(double, double)
    331 
    332 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
    333 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
    334 attributes #2 = { nounwind }
    335 
    336