Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A57 --check-prefix CHECK-EVEN
      2 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A57 --check-prefix CHECK-ODD
      3 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-EVEN
      4 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-ODD
      5 
      6 ; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so
      7 ; our test strategy is to:
      8 ;   * Force the pass to always perform register swapping even if the dest register is of the
      9 ;     correct color already (-force-all)
     10 ;   * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance),
     11 ;     and run it twice, once where it always hints odd, and once where it always hints even.
     12 ;
     13 ; We then use regex magic to check that in the two cases the register allocation is
     14 ; different; this is what gives us the testing coverage and distinguishes cases where
     15 ; the pass has done some work versus accidental regalloc.
     16 
     17 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
     18 target triple = "aarch64"
     19 
     20 ; Non-overlapping groups - shouldn't need any changing at all.
     21 
     22 ; CHECK-LABEL: f1:
     23 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
     24 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
     25 ; CHECK: fmadd [[x]]
     26 ; CHECK: fmsub [[x]]
     27 ; CHECK: fmadd [[x]]
     28 ; CHECK: str [[x]]
     29 
     30 define void @f1(double* nocapture readonly %p, double* nocapture %q) #0 {
     31 entry:
     32   %0 = load double, double* %p, align 8
     33   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
     34   %1 = load double, double* %arrayidx1, align 8
     35   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
     36   %2 = load double, double* %arrayidx2, align 8
     37   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
     38   %3 = load double, double* %arrayidx3, align 8
     39   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
     40   %4 = load double, double* %arrayidx4, align 8
     41   %mul = fmul fast double %0, %1
     42   %add = fadd fast double %mul, %4
     43   %mul5 = fmul fast double %1, %2
     44   %add6 = fadd fast double %mul5, %add
     45   %mul7 = fmul fast double %1, %3
     46   %sub = fsub fast double %add6, %mul7
     47   %mul8 = fmul fast double %2, %3
     48   %add9 = fadd fast double %mul8, %sub
     49   store double %add9, double* %q, align 8
     50   %arrayidx11 = getelementptr inbounds double, double* %p, i64 5
     51   %5 = load double, double* %arrayidx11, align 8
     52   %arrayidx12 = getelementptr inbounds double, double* %p, i64 6
     53   %6 = load double, double* %arrayidx12, align 8
     54   %arrayidx13 = getelementptr inbounds double, double* %p, i64 7
     55   %7 = load double, double* %arrayidx13, align 8
     56   %mul15 = fmul fast double %6, %7
     57   %mul16 = fmul fast double %0, %5
     58   %add17 = fadd fast double %mul16, %mul15
     59   %mul18 = fmul fast double %5, %6
     60   %add19 = fadd fast double %mul18, %add17
     61   %arrayidx20 = getelementptr inbounds double, double* %q, i64 1
     62   store double %add19, double* %arrayidx20, align 8
     63   ret void
     64 }
     65 
     66 ; Overlapping groups - coloring needed.
     67 
     68 ; CHECK-LABEL: f2:
     69 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
     70 ; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]]
     71 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
     72 ; CHECK-ODD: fmul [[y:d[0-9]*[02468]]]
     73 ; CHECK: fmadd [[x]]
     74 ; CHECK: fmadd [[y]]
     75 ; CHECK: fmsub [[x]]
     76 ; CHECK: fmadd [[y]]
     77 ; CHECK: fmadd [[x]]
     78 ; CHECK-A57: stp [[x]], [[y]]
     79 ; CHECK-A53-DAG: str [[x]]
     80 ; CHECK-A53-DAG: str [[y]]
     81 
     82 define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 {
     83 entry:
     84   %0 = load double, double* %p, align 8
     85   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
     86   %1 = load double, double* %arrayidx1, align 8
     87   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
     88   %2 = load double, double* %arrayidx2, align 8
     89   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
     90   %3 = load double, double* %arrayidx3, align 8
     91   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
     92   %4 = load double, double* %arrayidx4, align 8
     93   %arrayidx5 = getelementptr inbounds double, double* %p, i64 5
     94   %5 = load double, double* %arrayidx5, align 8
     95   %arrayidx6 = getelementptr inbounds double, double* %p, i64 6
     96   %6 = load double, double* %arrayidx6, align 8
     97   %arrayidx7 = getelementptr inbounds double, double* %p, i64 7
     98   %7 = load double, double* %arrayidx7, align 8
     99   %mul = fmul fast double %0, %1
    100   %add = fadd fast double %mul, %7
    101   %mul8 = fmul fast double %5, %6
    102   %mul9 = fmul fast double %1, %2
    103   %add10 = fadd fast double %mul9, %add
    104   %mul11 = fmul fast double %3, %4
    105   %add12 = fadd fast double %mul11, %mul8
    106   %mul13 = fmul fast double %1, %3
    107   %sub = fsub fast double %add10, %mul13
    108   %mul14 = fmul fast double %4, %5
    109   %add15 = fadd fast double %mul14, %add12
    110   %mul16 = fmul fast double %2, %3
    111   %add17 = fadd fast double %mul16, %sub
    112   store double %add17, double* %q, align 8
    113   %arrayidx19 = getelementptr inbounds double, double* %q, i64 1
    114   store double %add15, double* %arrayidx19, align 8
    115   ret void
    116 }
    117 
    118 ; Dest register is live on block exit - fixup needed.
    119 
    120 ; CHECK-LABEL: f3:
    121 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
    122 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
    123 ; CHECK: fmadd [[x]]
    124 ; CHECK: fmsub [[x]]
    125 ; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]]
    126 ; CHECK: str [[y]]
    127 
    128 define void @f3(double* nocapture readonly %p, double* nocapture %q) #0 {
    129 entry:
    130   %0 = load double, double* %p, align 8
    131   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
    132   %1 = load double, double* %arrayidx1, align 8
    133   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
    134   %2 = load double, double* %arrayidx2, align 8
    135   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
    136   %3 = load double, double* %arrayidx3, align 8
    137   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
    138   %4 = load double, double* %arrayidx4, align 8
    139   %mul = fmul fast double %0, %1
    140   %add = fadd fast double %mul, %4
    141   %mul5 = fmul fast double %1, %2
    142   %add6 = fadd fast double %mul5, %add
    143   %mul7 = fmul fast double %1, %3
    144   %sub = fsub fast double %add6, %mul7
    145   %mul8 = fmul fast double %2, %3
    146   %add9 = fadd fast double %mul8, %sub
    147   %cmp = fcmp oeq double %3, 0.000000e+00
    148   br i1 %cmp, label %if.then, label %if.end
    149 
    150 if.then:                                          ; preds = %entry
    151   tail call void bitcast (void (...)* @g to void ()*)() #2
    152   br label %if.end
    153 
    154 if.end:                                           ; preds = %if.then, %entry
    155   store double %add9, double* %q, align 8
    156   ret void
    157 }
    158 
    159 declare void @g(...) #1
    160 
    161 ; Single precision version of f2.
    162 
    163 ; CHECK-LABEL: f4:
    164 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
    165 ; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]]
    166 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
    167 ; CHECK-ODD: fmul [[y:s[0-9]*[02468]]]
    168 ; CHECK: fmadd [[x]]
    169 ; CHECK: fmadd [[y]]
    170 ; CHECK: fmsub [[x]]
    171 ; CHECK: fmadd [[y]]
    172 ; CHECK: fmadd [[x]]
    173 ; CHECK-A57: stp [[x]], [[y]]
    174 ; CHECK-A53-DAG: str [[x]]
    175 ; CHECK-A53-DAG: str [[y]]
    176 
    177 define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 {
    178 entry:
    179   %0 = load float, float* %p, align 4
    180   %arrayidx1 = getelementptr inbounds float, float* %p, i64 1
    181   %1 = load float, float* %arrayidx1, align 4
    182   %arrayidx2 = getelementptr inbounds float, float* %p, i64 2
    183   %2 = load float, float* %arrayidx2, align 4
    184   %arrayidx3 = getelementptr inbounds float, float* %p, i64 3
    185   %3 = load float, float* %arrayidx3, align 4
    186   %arrayidx4 = getelementptr inbounds float, float* %p, i64 4
    187   %4 = load float, float* %arrayidx4, align 4
    188   %arrayidx5 = getelementptr inbounds float, float* %p, i64 5
    189   %5 = load float, float* %arrayidx5, align 4
    190   %arrayidx6 = getelementptr inbounds float, float* %p, i64 6
    191   %6 = load float, float* %arrayidx6, align 4
    192   %arrayidx7 = getelementptr inbounds float, float* %p, i64 7
    193   %7 = load float, float* %arrayidx7, align 4
    194   %mul = fmul fast float %0, %1
    195   %add = fadd fast float %mul, %7
    196   %mul8 = fmul fast float %5, %6
    197   %mul9 = fmul fast float %1, %2
    198   %add10 = fadd fast float %mul9, %add
    199   %mul11 = fmul fast float %3, %4
    200   %add12 = fadd fast float %mul11, %mul8
    201   %mul13 = fmul fast float %1, %3
    202   %sub = fsub fast float %add10, %mul13
    203   %mul14 = fmul fast float %4, %5
    204   %add15 = fadd fast float %mul14, %add12
    205   %mul16 = fmul fast float %2, %3
    206   %add17 = fadd fast float %mul16, %sub
    207   store float %add17, float* %q, align 4
    208   %arrayidx19 = getelementptr inbounds float, float* %q, i64 1
    209   store float %add15, float* %arrayidx19, align 4
    210   ret void
    211 }
    212 
    213 ; Single precision version of f3
    214 
    215 ; CHECK-LABEL: f5:
    216 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
    217 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
    218 ; CHECK: fmadd [[x]]
    219 ; CHECK: fmsub [[x]]
    220 ; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]]
    221 ; CHECK: str [[y]]
    222 
    223 define void @f5(float* nocapture readonly %p, float* nocapture %q) #0 {
    224 entry:
    225   %0 = load float, float* %p, align 4
    226   %arrayidx1 = getelementptr inbounds float, float* %p, i64 1
    227   %1 = load float, float* %arrayidx1, align 4
    228   %arrayidx2 = getelementptr inbounds float, float* %p, i64 2
    229   %2 = load float, float* %arrayidx2, align 4
    230   %arrayidx3 = getelementptr inbounds float, float* %p, i64 3
    231   %3 = load float, float* %arrayidx3, align 4
    232   %arrayidx4 = getelementptr inbounds float, float* %p, i64 4
    233   %4 = load float, float* %arrayidx4, align 4
    234   %mul = fmul fast float %0, %1
    235   %add = fadd fast float %mul, %4
    236   %mul5 = fmul fast float %1, %2
    237   %add6 = fadd fast float %mul5, %add
    238   %mul7 = fmul fast float %1, %3
    239   %sub = fsub fast float %add6, %mul7
    240   %mul8 = fmul fast float %2, %3
    241   %add9 = fadd fast float %mul8, %sub
    242   %cmp = fcmp oeq float %3, 0.000000e+00
    243   br i1 %cmp, label %if.then, label %if.end
    244 
    245 if.then:                                          ; preds = %entry
    246   tail call void bitcast (void (...)* @g to void ()*)() #2
    247   br label %if.end
    248 
    249 if.end:                                           ; preds = %if.then, %entry
    250   store float %add9, float* %q, align 4
    251   ret void
    252 }
    253 
    254 ; Test that regmask clobbering stops a chain sequence.
    255 
    256 ; CHECK-LABEL: f6:
    257 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
    258 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
    259 ; CHECK: fmadd [[x]]
    260 ; CHECK: fmsub [[x]]
    261 ; CHECK: fmadd d0, {{.*}}, [[x]]
    262 ; CHECK: bl hh
    263 ; CHECK: str d0
    264 
    265 define void @f6(double* nocapture readonly %p, double* nocapture %q) #0 {
    266 entry:
    267   %0 = load double, double* %p, align 8
    268   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
    269   %1 = load double, double* %arrayidx1, align 8
    270   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
    271   %2 = load double, double* %arrayidx2, align 8
    272   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
    273   %3 = load double, double* %arrayidx3, align 8
    274   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
    275   %4 = load double, double* %arrayidx4, align 8
    276   %mul = fmul fast double %0, %1
    277   %add = fadd fast double %mul, %4
    278   %mul5 = fmul fast double %1, %2
    279   %add6 = fadd fast double %mul5, %add
    280   %mul7 = fmul fast double %1, %3
    281   %sub = fsub fast double %add6, %mul7
    282   %mul8 = fmul fast double %2, %3
    283   %add9 = fadd fast double %mul8, %sub
    284   %call = tail call double @hh(double %add9) #2
    285   store double %call, double* %q, align 8
    286   ret void
    287 }
    288 
    289 declare double @hh(double) #1
    290 
    291 ; Check that we correctly deal with repeated operands.
    292 ; The following testcase creates:
    293 ;   %D1<def> = FADDDrr %D0<kill>, %D0
    294 ; We'll get a crash if we naively look at the first operand, remove it
    295 ; from the substitution list then look at the second operand.
    296 
    297 ; CHECK: fmadd [[x:d[0-9]+]]
    298 ; CHECK: fadd d1, [[x]], [[x]]
    299 
    300 define void @f7(double* nocapture readonly %p, double* nocapture %q) #0 {
    301 entry:
    302   %0 = load double, double* %p, align 8
    303   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
    304   %1 = load double, double* %arrayidx1, align 8
    305   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
    306   %2 = load double, double* %arrayidx2, align 8
    307   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
    308   %3 = load double, double* %arrayidx3, align 8
    309   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
    310   %4 = load double, double* %arrayidx4, align 8
    311   %mul = fmul fast double %0, %1
    312   %add = fadd fast double %mul, %4
    313   %mul5 = fmul fast double %1, %2
    314   %add6 = fadd fast double %mul5, %add
    315   %mul7 = fmul fast double %1, %3
    316   %sub = fsub fast double %add6, %mul7
    317   %mul8 = fmul fast double %2, %3
    318   %add9 = fadd fast double %mul8, %sub
    319   %add10 = fadd fast double %add9, %add9
    320   call void @hhh(double 0.0, double %add10)
    321   ret void
    322 }
    323 
    324 declare void @hhh(double, double)
    325 
    326 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
    327 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
    328 attributes #2 = { nounwind }
    329 
    330