1 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-EVEN 2 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-ODD 3 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-EVEN 4 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-ODD 5 6 ; The following tests use the balance-fp-ops feature, and should be independent of 7 ; the target cpu. 8 9 ; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN --check-prefix CHECK-BALFP 10 ; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD --check-prefix CHECK-BALFP 11 12 ; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so 13 ; our test strategy is to: 14 ; * Force the pass to always perform register swapping even if the dest register is of the 15 ; correct color already (-force-all) 16 ; * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance), 17 ; and run it twice, once where it always hints odd, and once where it always hints even. 18 ; 19 ; We then use regex magic to check that in the two cases the register allocation is 20 ; different; this is what gives us the testing coverage and distinguishes cases where 21 ; the pass has done some work versus accidental regalloc. 22 23 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 24 target triple = "aarch64" 25 26 ; Non-overlapping groups - shouldn't need any changing at all. 27 28 ; CHECK-LABEL: f1: 29 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 30 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 31 ; CHECK: fmadd [[x]] 32 ; CHECK: fmsub [[x]] 33 ; CHECK: fmadd [[x]] 34 ; CHECK: str [[x]] 35 36 define void @f1(double* nocapture readonly %p, double* nocapture %q) #0 { 37 entry: 38 %0 = load double, double* %p, align 8 39 %arrayidx1 = getelementptr inbounds double, double* %p, i64 1 40 %1 = load double, double* %arrayidx1, align 8 41 %arrayidx2 = getelementptr inbounds double, double* %p, i64 2 42 %2 = load double, double* %arrayidx2, align 8 43 %arrayidx3 = getelementptr inbounds double, double* %p, i64 3 44 %3 = load double, double* %arrayidx3, align 8 45 %arrayidx4 = getelementptr inbounds double, double* %p, i64 4 46 %4 = load double, double* %arrayidx4, align 8 47 %mul = fmul fast double %0, %1 48 %add = fadd fast double %mul, %4 49 %mul5 = fmul fast double %1, %2 50 %add6 = fadd fast double %mul5, %add 51 %mul7 = fmul fast double %1, %3 52 %sub = fsub fast double %add6, %mul7 53 %mul8 = fmul fast double %2, %3 54 %add9 = fadd fast double %mul8, %sub 55 store double %add9, double* %q, align 8 56 %arrayidx11 = getelementptr inbounds double, double* %p, i64 5 57 %5 = load double, double* %arrayidx11, align 8 58 %arrayidx12 = getelementptr inbounds double, double* %p, i64 6 59 %6 = load double, double* %arrayidx12, align 8 60 %arrayidx13 = getelementptr inbounds double, double* %p, i64 7 61 %7 = load double, double* %arrayidx13, align 8 62 %mul15 = fmul fast double %6, %7 63 %mul16 = fmul fast double %0, %5 64 %add17 = fadd fast double %mul16, %mul15 65 %mul18 = fmul fast double %5, %6 66 %add19 = fadd fast double %mul18, %add17 67 %arrayidx20 = getelementptr inbounds double, double* %q, i64 1 68 store double %add19, double* %arrayidx20, align 8 69 ret void 70 } 71 72 ; Overlapping groups - coloring needed. 73 74 ; CHECK-LABEL: f2: 75 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 76 ; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]] 77 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 78 ; CHECK-ODD: fmul [[y:d[0-9]*[02468]]] 79 ; CHECK: fmadd [[x]] 80 ; CHECK: fmadd [[y]] 81 ; CHECK: fmsub [[x]] 82 ; CHECK: fmadd [[y]] 83 ; CHECK: fmadd [[x]] 84 ; CHECK-BALFP: stp [[x]], [[y]] 85 ; CHECK-A53-DAG: str [[x]] 86 ; CHECK-A53-DAG: str [[y]] 87 88 define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 { 89 entry: 90 %0 = load double, double* %p, align 8 91 %arrayidx1 = getelementptr inbounds double, double* %p, i64 1 92 %1 = load double, double* %arrayidx1, align 8 93 %arrayidx2 = getelementptr inbounds double, double* %p, i64 2 94 %2 = load double, double* %arrayidx2, align 8 95 %arrayidx3 = getelementptr inbounds double, double* %p, i64 3 96 %3 = load double, double* %arrayidx3, align 8 97 %arrayidx4 = getelementptr inbounds double, double* %p, i64 4 98 %4 = load double, double* %arrayidx4, align 8 99 %arrayidx5 = getelementptr inbounds double, double* %p, i64 5 100 %5 = load double, double* %arrayidx5, align 8 101 %arrayidx6 = getelementptr inbounds double, double* %p, i64 6 102 %6 = load double, double* %arrayidx6, align 8 103 %arrayidx7 = getelementptr inbounds double, double* %p, i64 7 104 %7 = load double, double* %arrayidx7, align 8 105 %mul = fmul fast double %0, %1 106 %add = fadd fast double %mul, %7 107 %mul8 = fmul fast double %5, %6 108 %mul9 = fmul fast double %1, %2 109 %add10 = fadd fast double %mul9, %add 110 %mul11 = fmul fast double %3, %4 111 %add12 = fadd fast double %mul11, %mul8 112 %mul13 = fmul fast double %1, %3 113 %sub = fsub fast double %add10, %mul13 114 %mul14 = fmul fast double %4, %5 115 %add15 = fadd fast double %mul14, %add12 116 %mul16 = fmul fast double %2, %3 117 %add17 = fadd fast double %mul16, %sub 118 store double %add17, double* %q, align 8 119 %arrayidx19 = getelementptr inbounds double, double* %q, i64 1 120 store double %add15, double* %arrayidx19, align 8 121 ret void 122 } 123 124 ; Dest register is live on block exit - fixup needed. 125 126 ; CHECK-LABEL: f3: 127 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 128 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 129 ; CHECK: fmadd [[x]] 130 ; CHECK: fmsub [[x]] 131 ; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]] 132 ; CHECK: str [[y]] 133 134 define void @f3(double* nocapture readonly %p, double* nocapture %q) #0 { 135 entry: 136 %0 = load double, double* %p, align 8 137 %arrayidx1 = getelementptr inbounds double, double* %p, i64 1 138 %1 = load double, double* %arrayidx1, align 8 139 %arrayidx2 = getelementptr inbounds double, double* %p, i64 2 140 %2 = load double, double* %arrayidx2, align 8 141 %arrayidx3 = getelementptr inbounds double, double* %p, i64 3 142 %3 = load double, double* %arrayidx3, align 8 143 %arrayidx4 = getelementptr inbounds double, double* %p, i64 4 144 %4 = load double, double* %arrayidx4, align 8 145 %mul = fmul fast double %0, %1 146 %add = fadd fast double %mul, %4 147 %mul5 = fmul fast double %1, %2 148 %add6 = fadd fast double %mul5, %add 149 %mul7 = fmul fast double %1, %3 150 %sub = fsub fast double %add6, %mul7 151 %mul8 = fmul fast double %2, %3 152 %add9 = fadd fast double %mul8, %sub 153 %cmp = fcmp oeq double %3, 0.000000e+00 154 br i1 %cmp, label %if.then, label %if.end 155 156 if.then: ; preds = %entry 157 tail call void bitcast (void (...)* @g to void ()*)() #2 158 br label %if.end 159 160 if.end: ; preds = %if.then, %entry 161 store double %add9, double* %q, align 8 162 ret void 163 } 164 165 declare void @g(...) #1 166 167 ; Single precision version of f2. 168 169 ; CHECK-LABEL: f4: 170 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]] 171 ; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]] 172 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]] 173 ; CHECK-ODD: fmul [[y:s[0-9]*[02468]]] 174 ; CHECK: fmadd [[x]] 175 ; CHECK: fmadd [[y]] 176 ; CHECK: fmsub [[x]] 177 ; CHECK: fmadd [[y]] 178 ; CHECK: fmadd [[x]] 179 ; CHECK-BALFP: stp [[x]], [[y]] 180 ; CHECK-A53-DAG: str [[x]] 181 ; CHECK-A53-DAG: str [[y]] 182 183 define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 { 184 entry: 185 %0 = load float, float* %p, align 4 186 %arrayidx1 = getelementptr inbounds float, float* %p, i64 1 187 %1 = load float, float* %arrayidx1, align 4 188 %arrayidx2 = getelementptr inbounds float, float* %p, i64 2 189 %2 = load float, float* %arrayidx2, align 4 190 %arrayidx3 = getelementptr inbounds float, float* %p, i64 3 191 %3 = load float, float* %arrayidx3, align 4 192 %arrayidx4 = getelementptr inbounds float, float* %p, i64 4 193 %4 = load float, float* %arrayidx4, align 4 194 %arrayidx5 = getelementptr inbounds float, float* %p, i64 5 195 %5 = load float, float* %arrayidx5, align 4 196 %arrayidx6 = getelementptr inbounds float, float* %p, i64 6 197 %6 = load float, float* %arrayidx6, align 4 198 %arrayidx7 = getelementptr inbounds float, float* %p, i64 7 199 %7 = load float, float* %arrayidx7, align 4 200 %mul = fmul fast float %0, %1 201 %add = fadd fast float %mul, %7 202 %mul8 = fmul fast float %5, %6 203 %mul9 = fmul fast float %1, %2 204 %add10 = fadd fast float %mul9, %add 205 %mul11 = fmul fast float %3, %4 206 %add12 = fadd fast float %mul11, %mul8 207 %mul13 = fmul fast float %1, %3 208 %sub = fsub fast float %add10, %mul13 209 %mul14 = fmul fast float %4, %5 210 %add15 = fadd fast float %mul14, %add12 211 %mul16 = fmul fast float %2, %3 212 %add17 = fadd fast float %mul16, %sub 213 store float %add17, float* %q, align 4 214 %arrayidx19 = getelementptr inbounds float, float* %q, i64 1 215 store float %add15, float* %arrayidx19, align 4 216 ret void 217 } 218 219 ; Single precision version of f3 220 221 ; CHECK-LABEL: f5: 222 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]] 223 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]] 224 ; CHECK: fmadd [[x]] 225 ; CHECK: fmsub [[x]] 226 ; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]] 227 ; CHECK: str [[y]] 228 229 define void @f5(float* nocapture readonly %p, float* nocapture %q) #0 { 230 entry: 231 %0 = load float, float* %p, align 4 232 %arrayidx1 = getelementptr inbounds float, float* %p, i64 1 233 %1 = load float, float* %arrayidx1, align 4 234 %arrayidx2 = getelementptr inbounds float, float* %p, i64 2 235 %2 = load float, float* %arrayidx2, align 4 236 %arrayidx3 = getelementptr inbounds float, float* %p, i64 3 237 %3 = load float, float* %arrayidx3, align 4 238 %arrayidx4 = getelementptr inbounds float, float* %p, i64 4 239 %4 = load float, float* %arrayidx4, align 4 240 %mul = fmul fast float %0, %1 241 %add = fadd fast float %mul, %4 242 %mul5 = fmul fast float %1, %2 243 %add6 = fadd fast float %mul5, %add 244 %mul7 = fmul fast float %1, %3 245 %sub = fsub fast float %add6, %mul7 246 %mul8 = fmul fast float %2, %3 247 %add9 = fadd fast float %mul8, %sub 248 %cmp = fcmp oeq float %3, 0.000000e+00 249 br i1 %cmp, label %if.then, label %if.end 250 251 if.then: ; preds = %entry 252 tail call void bitcast (void (...)* @g to void ()*)() #2 253 br label %if.end 254 255 if.end: ; preds = %if.then, %entry 256 store float %add9, float* %q, align 4 257 ret void 258 } 259 260 ; Test that regmask clobbering stops a chain sequence. 261 262 ; CHECK-LABEL: f6: 263 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 264 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 265 ; CHECK: fmadd [[x]] 266 ; CHECK: fmsub [[x]] 267 ; CHECK: fmadd d0, {{.*}}, [[x]] 268 ; CHECK: bl hh 269 ; CHECK: str d0 270 271 define void @f6(double* nocapture readonly %p, double* nocapture %q) #0 { 272 entry: 273 %0 = load double, double* %p, align 8 274 %arrayidx1 = getelementptr inbounds double, double* %p, i64 1 275 %1 = load double, double* %arrayidx1, align 8 276 %arrayidx2 = getelementptr inbounds double, double* %p, i64 2 277 %2 = load double, double* %arrayidx2, align 8 278 %arrayidx3 = getelementptr inbounds double, double* %p, i64 3 279 %3 = load double, double* %arrayidx3, align 8 280 %arrayidx4 = getelementptr inbounds double, double* %p, i64 4 281 %4 = load double, double* %arrayidx4, align 8 282 %mul = fmul fast double %0, %1 283 %add = fadd fast double %mul, %4 284 %mul5 = fmul fast double %1, %2 285 %add6 = fadd fast double %mul5, %add 286 %mul7 = fmul fast double %1, %3 287 %sub = fsub fast double %add6, %mul7 288 %mul8 = fmul fast double %2, %3 289 %add9 = fadd fast double %mul8, %sub 290 %call = tail call double @hh(double %add9) #2 291 store double %call, double* %q, align 8 292 ret void 293 } 294 295 declare double @hh(double) #1 296 297 ; Check that we correctly deal with repeated operands. 298 ; The following testcase creates: 299 ; %D1<def> = FADDDrr %D0<kill>, %D0 300 ; We'll get a crash if we naively look at the first operand, remove it 301 ; from the substitution list then look at the second operand. 302 303 ; CHECK: fmadd [[x:d[0-9]+]] 304 ; CHECK: fadd d1, [[x]], [[x]] 305 306 define void @f7(double* nocapture readonly %p, double* nocapture %q) #0 { 307 entry: 308 %0 = load double, double* %p, align 8 309 %arrayidx1 = getelementptr inbounds double, double* %p, i64 1 310 %1 = load double, double* %arrayidx1, align 8 311 %arrayidx2 = getelementptr inbounds double, double* %p, i64 2 312 %2 = load double, double* %arrayidx2, align 8 313 %arrayidx3 = getelementptr inbounds double, double* %p, i64 3 314 %3 = load double, double* %arrayidx3, align 8 315 %arrayidx4 = getelementptr inbounds double, double* %p, i64 4 316 %4 = load double, double* %arrayidx4, align 8 317 %mul = fmul fast double %0, %1 318 %add = fadd fast double %mul, %4 319 %mul5 = fmul fast double %1, %2 320 %add6 = fadd fast double %mul5, %add 321 %mul7 = fmul fast double %1, %3 322 %sub = fsub fast double %add6, %mul7 323 %mul8 = fmul fast double %2, %3 324 %add9 = fadd fast double %mul8, %sub 325 %add10 = fadd fast double %add9, %add9 326 call void @hhh(double 0.0, double %add10) 327 ret void 328 } 329 330 declare void @hhh(double, double) 331 332 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } 333 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } 334 attributes #2 = { nounwind } 335 336