Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN
      2 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD
      3 
      4 ; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so
      5 ; our test strategy is to:
      6 ;   * Force the pass to always perform register swapping even if the dest register is of the
      7 ;     correct color already (-force-all)
      8 ;   * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance),
      9 ;     and run it twice, once where it always hints odd, and once where it always hints even.
     10 ;
     11 ; We then use regex magic to check that in the two cases the register allocation is
     12 ; different; this is what gives us the testing coverage and distinguishes cases where
     13 ; the pass has done some work versus accidental regalloc.
     14 
     15 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
     16 target triple = "aarch64"
     17 
     18 ; Non-overlapping groups - shouldn't need any changing at all.
     19 
     20 ; CHECK-LABEL: f1:
     21 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
     22 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
     23 ; CHECK: fmadd [[x]]
     24 ; CHECK: fmsub [[x]]
     25 ; CHECK: fmadd [[x]]
     26 ; CHECK: str [[x]]
     27 
     28 define void @f1(double* nocapture readonly %p, double* nocapture %q) #0 {
     29 entry:
     30   %0 = load double* %p, align 8
     31   %arrayidx1 = getelementptr inbounds double* %p, i64 1
     32   %1 = load double* %arrayidx1, align 8
     33   %arrayidx2 = getelementptr inbounds double* %p, i64 2
     34   %2 = load double* %arrayidx2, align 8
     35   %arrayidx3 = getelementptr inbounds double* %p, i64 3
     36   %3 = load double* %arrayidx3, align 8
     37   %arrayidx4 = getelementptr inbounds double* %p, i64 4
     38   %4 = load double* %arrayidx4, align 8
     39   %mul = fmul fast double %0, %1
     40   %add = fadd fast double %mul, %4
     41   %mul5 = fmul fast double %1, %2
     42   %add6 = fadd fast double %mul5, %add
     43   %mul7 = fmul fast double %1, %3
     44   %sub = fsub fast double %add6, %mul7
     45   %mul8 = fmul fast double %2, %3
     46   %add9 = fadd fast double %mul8, %sub
     47   store double %add9, double* %q, align 8
     48   %arrayidx11 = getelementptr inbounds double* %p, i64 5
     49   %5 = load double* %arrayidx11, align 8
     50   %arrayidx12 = getelementptr inbounds double* %p, i64 6
     51   %6 = load double* %arrayidx12, align 8
     52   %arrayidx13 = getelementptr inbounds double* %p, i64 7
     53   %7 = load double* %arrayidx13, align 8
     54   %mul15 = fmul fast double %6, %7
     55   %mul16 = fmul fast double %0, %5
     56   %add17 = fadd fast double %mul16, %mul15
     57   %mul18 = fmul fast double %5, %6
     58   %add19 = fadd fast double %mul18, %add17
     59   %arrayidx20 = getelementptr inbounds double* %q, i64 1
     60   store double %add19, double* %arrayidx20, align 8
     61   ret void
     62 }
     63 
     64 ; Overlapping groups - coloring needed.
     65 
     66 ; CHECK-LABEL: f2:
     67 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
     68 ; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]]
     69 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
     70 ; CHECK-ODD: fmul [[y:d[0-9]*[02468]]]
     71 ; CHECK: fmadd [[x]]
     72 ; CHECK: fmadd [[y]]
     73 ; CHECK: fmsub [[x]]
     74 ; CHECK: fmadd [[y]]
     75 ; CHECK: fmadd [[x]]
     76 ; CHECK: stp [[x]], [[y]]
     77 
     78 define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 {
     79 entry:
     80   %0 = load double* %p, align 8
     81   %arrayidx1 = getelementptr inbounds double* %p, i64 1
     82   %1 = load double* %arrayidx1, align 8
     83   %arrayidx2 = getelementptr inbounds double* %p, i64 2
     84   %2 = load double* %arrayidx2, align 8
     85   %arrayidx3 = getelementptr inbounds double* %p, i64 3
     86   %3 = load double* %arrayidx3, align 8
     87   %arrayidx4 = getelementptr inbounds double* %p, i64 4
     88   %4 = load double* %arrayidx4, align 8
     89   %arrayidx5 = getelementptr inbounds double* %p, i64 5
     90   %5 = load double* %arrayidx5, align 8
     91   %arrayidx6 = getelementptr inbounds double* %p, i64 6
     92   %6 = load double* %arrayidx6, align 8
     93   %arrayidx7 = getelementptr inbounds double* %p, i64 7
     94   %7 = load double* %arrayidx7, align 8
     95   %mul = fmul fast double %0, %1
     96   %add = fadd fast double %mul, %7
     97   %mul8 = fmul fast double %5, %6
     98   %mul9 = fmul fast double %1, %2
     99   %add10 = fadd fast double %mul9, %add
    100   %mul11 = fmul fast double %3, %4
    101   %add12 = fadd fast double %mul11, %mul8
    102   %mul13 = fmul fast double %1, %3
    103   %sub = fsub fast double %add10, %mul13
    104   %mul14 = fmul fast double %4, %5
    105   %add15 = fadd fast double %mul14, %add12
    106   %mul16 = fmul fast double %2, %3
    107   %add17 = fadd fast double %mul16, %sub
    108   store double %add17, double* %q, align 8
    109   %arrayidx19 = getelementptr inbounds double* %q, i64 1
    110   store double %add15, double* %arrayidx19, align 8
    111   ret void
    112 }
    113 
    114 ; Dest register is live on block exit - fixup needed.
    115 
    116 ; CHECK-LABEL: f3:
    117 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
    118 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
    119 ; CHECK: fmadd [[x]]
    120 ; CHECK: fmsub [[x]]
    121 ; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]]
    122 ; CHECK: str [[y]]
    123 
    124 define void @f3(double* nocapture readonly %p, double* nocapture %q) #0 {
    125 entry:
    126   %0 = load double* %p, align 8
    127   %arrayidx1 = getelementptr inbounds double* %p, i64 1
    128   %1 = load double* %arrayidx1, align 8
    129   %arrayidx2 = getelementptr inbounds double* %p, i64 2
    130   %2 = load double* %arrayidx2, align 8
    131   %arrayidx3 = getelementptr inbounds double* %p, i64 3
    132   %3 = load double* %arrayidx3, align 8
    133   %arrayidx4 = getelementptr inbounds double* %p, i64 4
    134   %4 = load double* %arrayidx4, align 8
    135   %mul = fmul fast double %0, %1
    136   %add = fadd fast double %mul, %4
    137   %mul5 = fmul fast double %1, %2
    138   %add6 = fadd fast double %mul5, %add
    139   %mul7 = fmul fast double %1, %3
    140   %sub = fsub fast double %add6, %mul7
    141   %mul8 = fmul fast double %2, %3
    142   %add9 = fadd fast double %mul8, %sub
    143   %cmp = fcmp oeq double %3, 0.000000e+00
    144   br i1 %cmp, label %if.then, label %if.end
    145 
    146 if.then:                                          ; preds = %entry
    147   tail call void bitcast (void (...)* @g to void ()*)() #2
    148   br label %if.end
    149 
    150 if.end:                                           ; preds = %if.then, %entry
    151   store double %add9, double* %q, align 8
    152   ret void
    153 }
    154 
    155 declare void @g(...) #1
    156 
    157 ; Single precision version of f2.
    158 
    159 ; CHECK-LABEL: f4:
    160 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
    161 ; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]]
    162 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
    163 ; CHECK-ODD: fmul [[y:s[0-9]*[02468]]]
    164 ; CHECK: fmadd [[x]]
    165 ; CHECK: fmadd [[y]]
    166 ; CHECK: fmsub [[x]]
    167 ; CHECK: fmadd [[y]]
    168 ; CHECK: fmadd [[x]]
    169 ; CHECK: stp [[x]], [[y]]
    170 
    171 define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 {
    172 entry:
    173   %0 = load float* %p, align 4
    174   %arrayidx1 = getelementptr inbounds float* %p, i64 1
    175   %1 = load float* %arrayidx1, align 4
    176   %arrayidx2 = getelementptr inbounds float* %p, i64 2
    177   %2 = load float* %arrayidx2, align 4
    178   %arrayidx3 = getelementptr inbounds float* %p, i64 3
    179   %3 = load float* %arrayidx3, align 4
    180   %arrayidx4 = getelementptr inbounds float* %p, i64 4
    181   %4 = load float* %arrayidx4, align 4
    182   %arrayidx5 = getelementptr inbounds float* %p, i64 5
    183   %5 = load float* %arrayidx5, align 4
    184   %arrayidx6 = getelementptr inbounds float* %p, i64 6
    185   %6 = load float* %arrayidx6, align 4
    186   %arrayidx7 = getelementptr inbounds float* %p, i64 7
    187   %7 = load float* %arrayidx7, align 4
    188   %mul = fmul fast float %0, %1
    189   %add = fadd fast float %mul, %7
    190   %mul8 = fmul fast float %5, %6
    191   %mul9 = fmul fast float %1, %2
    192   %add10 = fadd fast float %mul9, %add
    193   %mul11 = fmul fast float %3, %4
    194   %add12 = fadd fast float %mul11, %mul8
    195   %mul13 = fmul fast float %1, %3
    196   %sub = fsub fast float %add10, %mul13
    197   %mul14 = fmul fast float %4, %5
    198   %add15 = fadd fast float %mul14, %add12
    199   %mul16 = fmul fast float %2, %3
    200   %add17 = fadd fast float %mul16, %sub
    201   store float %add17, float* %q, align 4
    202   %arrayidx19 = getelementptr inbounds float* %q, i64 1
    203   store float %add15, float* %arrayidx19, align 4
    204   ret void
    205 }
    206 
    207 ; Single precision version of f3
    208 
    209 ; CHECK-LABEL: f5:
    210 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
    211 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
    212 ; CHECK: fmadd [[x]]
    213 ; CHECK: fmsub [[x]]
    214 ; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]]
    215 ; CHECK: str [[y]]
    216 
    217 define void @f5(float* nocapture readonly %p, float* nocapture %q) #0 {
    218 entry:
    219   %0 = load float* %p, align 4
    220   %arrayidx1 = getelementptr inbounds float* %p, i64 1
    221   %1 = load float* %arrayidx1, align 4
    222   %arrayidx2 = getelementptr inbounds float* %p, i64 2
    223   %2 = load float* %arrayidx2, align 4
    224   %arrayidx3 = getelementptr inbounds float* %p, i64 3
    225   %3 = load float* %arrayidx3, align 4
    226   %arrayidx4 = getelementptr inbounds float* %p, i64 4
    227   %4 = load float* %arrayidx4, align 4
    228   %mul = fmul fast float %0, %1
    229   %add = fadd fast float %mul, %4
    230   %mul5 = fmul fast float %1, %2
    231   %add6 = fadd fast float %mul5, %add
    232   %mul7 = fmul fast float %1, %3
    233   %sub = fsub fast float %add6, %mul7
    234   %mul8 = fmul fast float %2, %3
    235   %add9 = fadd fast float %mul8, %sub
    236   %cmp = fcmp oeq float %3, 0.000000e+00
    237   br i1 %cmp, label %if.then, label %if.end
    238 
    239 if.then:                                          ; preds = %entry
    240   tail call void bitcast (void (...)* @g to void ()*)() #2
    241   br label %if.end
    242 
    243 if.end:                                           ; preds = %if.then, %entry
    244   store float %add9, float* %q, align 4
    245   ret void
    246 }
    247 
    248 ; Test that regmask clobbering stops a chain sequence.
    249 
    250 ; CHECK-LABEL: f6:
    251 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
    252 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
    253 ; CHECK: fmadd [[x]]
    254 ; CHECK: fmsub [[x]]
    255 ; CHECK: fmadd d0, {{.*}}, [[x]]
    256 ; CHECK: bl hh
    257 ; CHECK: str d0
    258 
    259 define void @f6(double* nocapture readonly %p, double* nocapture %q) #0 {
    260 entry:
    261   %0 = load double* %p, align 8
    262   %arrayidx1 = getelementptr inbounds double* %p, i64 1
    263   %1 = load double* %arrayidx1, align 8
    264   %arrayidx2 = getelementptr inbounds double* %p, i64 2
    265   %2 = load double* %arrayidx2, align 8
    266   %arrayidx3 = getelementptr inbounds double* %p, i64 3
    267   %3 = load double* %arrayidx3, align 8
    268   %arrayidx4 = getelementptr inbounds double* %p, i64 4
    269   %4 = load double* %arrayidx4, align 8
    270   %mul = fmul fast double %0, %1
    271   %add = fadd fast double %mul, %4
    272   %mul5 = fmul fast double %1, %2
    273   %add6 = fadd fast double %mul5, %add
    274   %mul7 = fmul fast double %1, %3
    275   %sub = fsub fast double %add6, %mul7
    276   %mul8 = fmul fast double %2, %3
    277   %add9 = fadd fast double %mul8, %sub
    278   %call = tail call double @hh(double %add9) #2
    279   store double %call, double* %q, align 8
    280   ret void
    281 }
    282 
    283 declare double @hh(double) #1
    284 
    285 ; Check that we correctly deal with repeated operands.
    286 ; The following testcase creates:
    287 ;   %D1<def> = FADDDrr %D0<kill>, %D0
    288 ; We'll get a crash if we naively look at the first operand, remove it
    289 ; from the substitution list then look at the second operand.
    290 
    291 ; CHECK: fmadd [[x:d[0-9]+]]
    292 ; CHECK: fadd d1, [[x]], [[x]]
    293 
    294 define void @f7(double* nocapture readonly %p, double* nocapture %q) #0 {
    295 entry:
    296   %0 = load double* %p, align 8
    297   %arrayidx1 = getelementptr inbounds double* %p, i64 1
    298   %1 = load double* %arrayidx1, align 8
    299   %arrayidx2 = getelementptr inbounds double* %p, i64 2
    300   %2 = load double* %arrayidx2, align 8
    301   %arrayidx3 = getelementptr inbounds double* %p, i64 3
    302   %3 = load double* %arrayidx3, align 8
    303   %arrayidx4 = getelementptr inbounds double* %p, i64 4
    304   %4 = load double* %arrayidx4, align 8
    305   %mul = fmul fast double %0, %1
    306   %add = fadd fast double %mul, %4
    307   %mul5 = fmul fast double %1, %2
    308   %add6 = fadd fast double %mul5, %add
    309   %mul7 = fmul fast double %1, %3
    310   %sub = fsub fast double %add6, %mul7
    311   %mul8 = fmul fast double %2, %3
    312   %add9 = fadd fast double %mul8, %sub
    313   %add10 = fadd fast double %add9, %add9
    314   call void @hhh(double 0.0, double %add10)
    315   ret void
    316 }
    317 
    318 declare void @hhh(double, double)
    319 
    320 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
    321 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
    322 attributes #2 = { nounwind }
    323 
    324