Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math -disable-post-ra < %s | FileCheck %s
      2 
      3 ; Incremental updates of the instruction depths should be enough for this test
      4 ; case.
      5 ; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math \
      6 ; RUN:     -disable-post-ra -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s
      7 
      8 ; Verify that the first two adds are independent regardless of how the inputs are
      9 ; commuted. The destination registers are used as source registers for the third add.
     10 
     11 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
     12 ; CHECK-LABEL:   reassociate_adds1:
     13 ; CHECK:         fadd  s0, s0, s1
     14 ; CHECK-NEXT:    fadd  s1, s2, s3
     15 ; CHECK-NEXT:    fadd  s0, s0, s1
     16 ; CHECK-NEXT:    ret
     17   %t0 = fadd float %x0, %x1
     18   %t1 = fadd float %t0, %x2
     19   %t2 = fadd float %t1, %x3
     20   ret float %t2
     21 }
     22 
     23 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
     24 ; CHECK-LABEL:   reassociate_adds2:
     25 ; CHECK:         fadd  s0, s0, s1
     26 ; CHECK-NEXT:    fadd  s1, s2, s3
     27 ; CHECK-NEXT:    fadd  s0, s0, s1
     28 ; CHECK-NEXT:    ret
     29   %t0 = fadd float %x0, %x1
     30   %t1 = fadd float %x2, %t0
     31   %t2 = fadd float %t1, %x3
     32   ret float %t2
     33 }
     34 
     35 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
     36 ; CHECK-LABEL:   reassociate_adds3:
     37 ; CHECK:         s0, s0, s1
     38 ; CHECK-NEXT:    s1, s2, s3
     39 ; CHECK-NEXT:    s0, s0, s1
     40 ; CHECK-NEXT:    ret
     41   %t0 = fadd float %x0, %x1
     42   %t1 = fadd float %t0, %x2
     43   %t2 = fadd float %x3, %t1
     44   ret float %t2
     45 }
     46 
     47 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
     48 ; CHECK-LABEL:   reassociate_adds4:
     49 ; CHECK:         s0, s0, s1
     50 ; CHECK-NEXT:    s1, s2, s3
     51 ; CHECK-NEXT:    s0, s0, s1
     52 ; CHECK-NEXT:    ret
     53   %t0 = fadd float %x0, %x1
     54   %t1 = fadd float %x2, %t0
     55   %t2 = fadd float %x3, %t1
     56   ret float %t2
     57 }
     58 
     59 ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
     60 ; produced because that would cost more compile time.
     61 
     62 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
     63 ; CHECK-LABEL:   reassociate_adds5:
     64 ; CHECK:         fadd  s0, s0, s1
     65 ; CHECK-NEXT:    fadd  s1, s2, s3
     66 ; CHECK-NEXT:    fadd  s0, s0, s1
     67 ; CHECK-NEXT:    fadd  s1, s4, s5
     68 ; CHECK-NEXT:    fadd  s1, s1, s6
     69 ; CHECK-NEXT:    fadd  s0, s0, s1
     70 ; CHECK-NEXT:    fadd  s0, s0, s7
     71 ; CHECK-NEXT:    ret
     72   %t0 = fadd float %x0, %x1
     73   %t1 = fadd float %t0, %x2
     74   %t2 = fadd float %t1, %x3
     75   %t3 = fadd float %t2, %x4
     76   %t4 = fadd float %t3, %x5
     77   %t5 = fadd float %t4, %x6
     78   %t6 = fadd float %t5, %x7
     79   ret float %t6
     80 }
     81 
     82 ; Verify that we only need two associative operations to reassociate the operands.
     83 ; Also, we should reassociate such that the result of the high latency division
     84 ; is used by the final 'add' rather than reassociating the %x3 operand with the
     85 ; division. The latter reassociation would not improve anything.
     86 
     87 define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
     88 ; CHECK-LABEL:   reassociate_adds6:
     89 ; CHECK:         fdiv  s0, s0, s1
     90 ; CHECK-NEXT:    fadd  s1, s2, s3
     91 ; CHECK-NEXT:    fadd  s0, s0, s1
     92 ; CHECK-NEXT:    ret
     93   %t0 = fdiv float %x0, %x1
     94   %t1 = fadd float %x2, %t0
     95   %t2 = fadd float %x3, %t1
     96   ret float %t2
     97 }
     98 
     99 ; Verify that scalar single-precision multiplies are reassociated.
    100 
    101 define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
    102 ; CHECK-LABEL:   reassociate_muls1:
    103 ; CHECK:         fdiv  s0, s0, s1
    104 ; CHECK-NEXT:    fmul  s1, s2, s3
    105 ; CHECK-NEXT:    fmul  s0, s0, s1
    106 ; CHECK-NEXT:    ret
    107   %t0 = fdiv float %x0, %x1
    108   %t1 = fmul float %x2, %t0
    109   %t2 = fmul float %x3, %t1
    110   ret float %t2
    111 }
    112 
    113 ; Verify that scalar double-precision adds are reassociated.
    114 
    115 define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
    116 ; CHECK-LABEL:   reassociate_adds_double:
    117 ; CHECK:         fdiv  d0, d0, d1
    118 ; CHECK-NEXT:    fadd  d1, d2, d3
    119 ; CHECK-NEXT:    fadd  d0, d0, d1
    120 ; CHECK-NEXT:    ret
    121   %t0 = fdiv double %x0, %x1
    122   %t1 = fadd double %x2, %t0
    123   %t2 = fadd double %x3, %t1
    124   ret double %t2
    125 }
    126 
    127 ; Verify that scalar double-precision multiplies are reassociated.
    128 
    129 define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
    130 ; CHECK-LABEL:   reassociate_muls_double:
    131 ; CHECK:         fdiv  d0, d0, d1
    132 ; CHECK-NEXT:    fmul  d1, d2, d3
    133 ; CHECK-NEXT:    fmul  d0, d0, d1
    134 ; CHECK-NEXT:    ret
    135   %t0 = fdiv double %x0, %x1
    136   %t1 = fmul double %x2, %t0
    137   %t2 = fmul double %x3, %t1
    138   ret double %t2
    139 }
    140 
    141 ; Verify that we reassociate vector instructions too.
    142 
    143 define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
    144 ; CHECK-LABEL:   vector_reassociate_adds1:
    145 ; CHECK:         fadd  v0.4s, v0.4s, v1.4s
    146 ; CHECK-NEXT:    fadd  v1.4s, v2.4s, v3.4s
    147 ; CHECK-NEXT:    fadd  v0.4s, v0.4s, v1.4s
    148 ; CHECK-NEXT:    ret
    149   %t0 = fadd <4 x float> %x0, %x1
    150   %t1 = fadd <4 x float> %t0, %x2
    151   %t2 = fadd <4 x float> %t1, %x3
    152   ret <4 x float> %t2
    153 }
    154 
    155 define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
    156 ; CHECK-LABEL:   vector_reassociate_adds2:
    157 ; CHECK:         fadd  v0.4s, v0.4s, v1.4s
    158 ; CHECK-NEXT:    fadd  v1.4s, v2.4s, v3.4s
    159 ; CHECK-NEXT:    fadd  v0.4s, v0.4s, v1.4s
    160   %t0 = fadd <4 x float> %x0, %x1
    161   %t1 = fadd <4 x float> %x2, %t0
    162   %t2 = fadd <4 x float> %t1, %x3
    163   ret <4 x float> %t2
    164 }
    165 
    166 define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
    167 ; CHECK-LABEL:   vector_reassociate_adds3:
    168 ; CHECK:         fadd  v0.4s, v0.4s, v1.4s
    169 ; CHECK-NEXT:    fadd  v1.4s, v2.4s, v3.4s
    170 ; CHECK-NEXT:    fadd  v0.4s, v0.4s, v1.4s
    171   %t0 = fadd <4 x float> %x0, %x1
    172   %t1 = fadd <4 x float> %t0, %x2
    173   %t2 = fadd <4 x float> %x3, %t1
    174   ret <4 x float> %t2
    175 }
    176 
    177 define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
    178 ; CHECK-LABEL:   vector_reassociate_adds4:
    179 ; CHECK:         fadd  v0.4s, v0.4s, v1.4s
    180 ; CHECK-NEXT:    fadd  v1.4s, v2.4s, v3.4s
    181 ; CHECK-NEXT:    fadd  v0.4s, v0.4s, v1.4s
    182   %t0 = fadd <4 x float> %x0, %x1
    183   %t1 = fadd <4 x float> %x2, %t0
    184   %t2 = fadd <4 x float> %x3, %t1
    185   ret <4 x float> %t2
    186 }
    187 ; Verify that 128-bit vector single-precision multiplies are reassociated.
    188 
    189 define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
    190 ; CHECK-LABEL:   reassociate_muls_v4f32:
    191 ; CHECK:         fadd  v0.4s, v0.4s, v1.4s
    192 ; CHECK-NEXT:    fmul  v1.4s, v2.4s, v3.4s
    193 ; CHECK-NEXT:    fmul  v0.4s, v0.4s, v1.4s
    194 ; CHECK-NEXT:    ret
    195   %t0 = fadd <4 x float> %x0, %x1
    196   %t1 = fmul <4 x float> %x2, %t0
    197   %t2 = fmul <4 x float> %x3, %t1
    198   ret <4 x float> %t2
    199 }
    200 
    201 ; Verify that 128-bit vector double-precision multiplies are reassociated.
    202 
    203 define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
    204 ; CHECK-LABEL:   reassociate_muls_v2f64:
    205 ; CHECK:         fadd  v0.2d, v0.2d, v1.2d
    206 ; CHECK-NEXT:    fmul  v1.2d, v2.2d, v3.2d
    207 ; CHECK-NEXT:    fmul  v0.2d, v0.2d, v1.2d
    208 ; CHECK-NEXT:    ret
    209   %t0 = fadd <2 x double> %x0, %x1
    210   %t1 = fmul <2 x double> %x2, %t0
    211   %t2 = fmul <2 x double> %x3, %t1
    212   ret <2 x double> %t2
    213 }
    214 
    215 ; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
    216 ; Verify that reassociation is not happening needlessly or wrongly.
    217 
    218 declare double @bar()
    219 
    220 define double @reassociate_adds_from_calls() {
    221 ; CHECK-LABEL: reassociate_adds_from_calls:
    222 ; CHECK:       bl   bar
    223 ; CHECK-NEXT:  mov  v8.16b, v0.16b 
    224 ; CHECK-NEXT:  bl   bar
    225 ; CHECK-NEXT:  mov  v9.16b, v0.16b
    226 ; CHECK-NEXT:  bl   bar
    227 ; CHECK-NEXT:  mov  v10.16b, v0.16b 
    228 ; CHECK-NEXT:  bl   bar
    229 ; CHECK:       fadd d1, d8, d9 
    230 ; CHECK-NEXT:  fadd d0, d10, d0
    231 ; CHECK-NEXT:  fadd d0, d1, d0
    232   %x0 = call double @bar()
    233   %x1 = call double @bar()
    234   %x2 = call double @bar()
    235   %x3 = call double @bar()
    236   %t0 = fadd double %x0, %x1
    237   %t1 = fadd double %t0, %x2
    238   %t2 = fadd double %t1, %x3
    239   ret double %t2
    240 }
    241 
    242 define double @already_reassociated() {
    243 ; CHECK-LABEL: already_reassociated:
    244 ; CHECK:       bl   bar
    245 ; CHECK-NEXT:  mov  v8.16b, v0.16b 
    246 ; CHECK-NEXT:  bl   bar
    247 ; CHECK-NEXT:  mov  v9.16b, v0.16b
    248 ; CHECK-NEXT:  bl   bar
    249 ; CHECK-NEXT:  mov  v10.16b, v0.16b 
    250 ; CHECK-NEXT:  bl   bar
    251 ; CHECK:       fadd d1, d8, d9 
    252 ; CHECK-NEXT:  fadd d0, d10, d0
    253 ; CHECK-NEXT:  fadd d0, d1, d0
    254   %x0 = call double @bar()
    255   %x1 = call double @bar()
    256   %x2 = call double @bar()
    257   %x3 = call double @bar()
    258   %t0 = fadd double %x0, %x1
    259   %t1 = fadd double %x2, %x3
    260   %t2 = fadd double %t0, %t1
    261   ret double %t2
    262 }
    263 
    264