1 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A57 --check-prefix CHECK-EVEN 2 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A57 --check-prefix CHECK-ODD 3 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-EVEN 4 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-ODD 5 6 ; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so 7 ; our test strategy is to: 8 ; * Force the pass to always perform register swapping even if the dest register is of the 9 ; correct color already (-force-all) 10 ; * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance), 11 ; and run it twice, once where it always hints odd, and once where it always hints even. 12 ; 13 ; We then use regex magic to check that in the two cases the register allocation is 14 ; different; this is what gives us the testing coverage and distinguishes cases where 15 ; the pass has done some work versus accidental regalloc. 16 17 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 18 target triple = "aarch64" 19 20 ; Non-overlapping groups - shouldn't need any changing at all. 21 22 ; CHECK-LABEL: f1: 23 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 24 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 25 ; CHECK: fmadd [[x]] 26 ; CHECK: fmsub [[x]] 27 ; CHECK: fmadd [[x]] 28 ; CHECK: str [[x]] 29 30 define void @f1(double* nocapture readonly %p, double* nocapture %q) #0 { 31 entry: 32 %0 = load double, double* %p, align 8 33 %arrayidx1 = getelementptr inbounds double, double* %p, i64 1 34 %1 = load double, double* %arrayidx1, align 8 35 %arrayidx2 = getelementptr inbounds double, double* %p, i64 2 36 %2 = load double, double* %arrayidx2, align 8 37 %arrayidx3 = getelementptr inbounds double, double* %p, i64 3 38 %3 = load double, double* %arrayidx3, align 8 39 %arrayidx4 = getelementptr inbounds double, double* %p, i64 4 40 %4 = load double, double* %arrayidx4, align 8 41 %mul = fmul fast double %0, %1 42 %add = fadd fast double %mul, %4 43 %mul5 = fmul fast double %1, %2 44 %add6 = fadd fast double %mul5, %add 45 %mul7 = fmul fast double %1, %3 46 %sub = fsub fast double %add6, %mul7 47 %mul8 = fmul fast double %2, %3 48 %add9 = fadd fast double %mul8, %sub 49 store double %add9, double* %q, align 8 50 %arrayidx11 = getelementptr inbounds double, double* %p, i64 5 51 %5 = load double, double* %arrayidx11, align 8 52 %arrayidx12 = getelementptr inbounds double, double* %p, i64 6 53 %6 = load double, double* %arrayidx12, align 8 54 %arrayidx13 = getelementptr inbounds double, double* %p, i64 7 55 %7 = load double, double* %arrayidx13, align 8 56 %mul15 = fmul fast double %6, %7 57 %mul16 = fmul fast double %0, %5 58 %add17 = fadd fast double %mul16, %mul15 59 %mul18 = fmul fast double %5, %6 60 %add19 = fadd fast double %mul18, %add17 61 %arrayidx20 = getelementptr inbounds double, double* %q, i64 1 62 store double %add19, double* %arrayidx20, align 8 63 ret void 64 } 65 66 ; Overlapping groups - coloring needed. 67 68 ; CHECK-LABEL: f2: 69 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 70 ; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]] 71 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 72 ; CHECK-ODD: fmul [[y:d[0-9]*[02468]]] 73 ; CHECK: fmadd [[x]] 74 ; CHECK: fmadd [[y]] 75 ; CHECK: fmsub [[x]] 76 ; CHECK: fmadd [[y]] 77 ; CHECK: fmadd [[x]] 78 ; CHECK-A57: stp [[x]], [[y]] 79 ; CHECK-A53-DAG: str [[x]] 80 ; CHECK-A53-DAG: str [[y]] 81 82 define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 { 83 entry: 84 %0 = load double, double* %p, align 8 85 %arrayidx1 = getelementptr inbounds double, double* %p, i64 1 86 %1 = load double, double* %arrayidx1, align 8 87 %arrayidx2 = getelementptr inbounds double, double* %p, i64 2 88 %2 = load double, double* %arrayidx2, align 8 89 %arrayidx3 = getelementptr inbounds double, double* %p, i64 3 90 %3 = load double, double* %arrayidx3, align 8 91 %arrayidx4 = getelementptr inbounds double, double* %p, i64 4 92 %4 = load double, double* %arrayidx4, align 8 93 %arrayidx5 = getelementptr inbounds double, double* %p, i64 5 94 %5 = load double, double* %arrayidx5, align 8 95 %arrayidx6 = getelementptr inbounds double, double* %p, i64 6 96 %6 = load double, double* %arrayidx6, align 8 97 %arrayidx7 = getelementptr inbounds double, double* %p, i64 7 98 %7 = load double, double* %arrayidx7, align 8 99 %mul = fmul fast double %0, %1 100 %add = fadd fast double %mul, %7 101 %mul8 = fmul fast double %5, %6 102 %mul9 = fmul fast double %1, %2 103 %add10 = fadd fast double %mul9, %add 104 %mul11 = fmul fast double %3, %4 105 %add12 = fadd fast double %mul11, %mul8 106 %mul13 = fmul fast double %1, %3 107 %sub = fsub fast double %add10, %mul13 108 %mul14 = fmul fast double %4, %5 109 %add15 = fadd fast double %mul14, %add12 110 %mul16 = fmul fast double %2, %3 111 %add17 = fadd fast double %mul16, %sub 112 store double %add17, double* %q, align 8 113 %arrayidx19 = getelementptr inbounds double, double* %q, i64 1 114 store double %add15, double* %arrayidx19, align 8 115 ret void 116 } 117 118 ; Dest register is live on block exit - fixup needed. 119 120 ; CHECK-LABEL: f3: 121 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 122 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 123 ; CHECK: fmadd [[x]] 124 ; CHECK: fmsub [[x]] 125 ; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]] 126 ; CHECK: str [[y]] 127 128 define void @f3(double* nocapture readonly %p, double* nocapture %q) #0 { 129 entry: 130 %0 = load double, double* %p, align 8 131 %arrayidx1 = getelementptr inbounds double, double* %p, i64 1 132 %1 = load double, double* %arrayidx1, align 8 133 %arrayidx2 = getelementptr inbounds double, double* %p, i64 2 134 %2 = load double, double* %arrayidx2, align 8 135 %arrayidx3 = getelementptr inbounds double, double* %p, i64 3 136 %3 = load double, double* %arrayidx3, align 8 137 %arrayidx4 = getelementptr inbounds double, double* %p, i64 4 138 %4 = load double, double* %arrayidx4, align 8 139 %mul = fmul fast double %0, %1 140 %add = fadd fast double %mul, %4 141 %mul5 = fmul fast double %1, %2 142 %add6 = fadd fast double %mul5, %add 143 %mul7 = fmul fast double %1, %3 144 %sub = fsub fast double %add6, %mul7 145 %mul8 = fmul fast double %2, %3 146 %add9 = fadd fast double %mul8, %sub 147 %cmp = fcmp oeq double %3, 0.000000e+00 148 br i1 %cmp, label %if.then, label %if.end 149 150 if.then: ; preds = %entry 151 tail call void bitcast (void (...)* @g to void ()*)() #2 152 br label %if.end 153 154 if.end: ; preds = %if.then, %entry 155 store double %add9, double* %q, align 8 156 ret void 157 } 158 159 declare void @g(...) #1 160 161 ; Single precision version of f2. 162 163 ; CHECK-LABEL: f4: 164 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]] 165 ; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]] 166 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]] 167 ; CHECK-ODD: fmul [[y:s[0-9]*[02468]]] 168 ; CHECK: fmadd [[x]] 169 ; CHECK: fmadd [[y]] 170 ; CHECK: fmsub [[x]] 171 ; CHECK: fmadd [[y]] 172 ; CHECK: fmadd [[x]] 173 ; CHECK-A57: stp [[x]], [[y]] 174 ; CHECK-A53-DAG: str [[x]] 175 ; CHECK-A53-DAG: str [[y]] 176 177 define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 { 178 entry: 179 %0 = load float, float* %p, align 4 180 %arrayidx1 = getelementptr inbounds float, float* %p, i64 1 181 %1 = load float, float* %arrayidx1, align 4 182 %arrayidx2 = getelementptr inbounds float, float* %p, i64 2 183 %2 = load float, float* %arrayidx2, align 4 184 %arrayidx3 = getelementptr inbounds float, float* %p, i64 3 185 %3 = load float, float* %arrayidx3, align 4 186 %arrayidx4 = getelementptr inbounds float, float* %p, i64 4 187 %4 = load float, float* %arrayidx4, align 4 188 %arrayidx5 = getelementptr inbounds float, float* %p, i64 5 189 %5 = load float, float* %arrayidx5, align 4 190 %arrayidx6 = getelementptr inbounds float, float* %p, i64 6 191 %6 = load float, float* %arrayidx6, align 4 192 %arrayidx7 = getelementptr inbounds float, float* %p, i64 7 193 %7 = load float, float* %arrayidx7, align 4 194 %mul = fmul fast float %0, %1 195 %add = fadd fast float %mul, %7 196 %mul8 = fmul fast float %5, %6 197 %mul9 = fmul fast float %1, %2 198 %add10 = fadd fast float %mul9, %add 199 %mul11 = fmul fast float %3, %4 200 %add12 = fadd fast float %mul11, %mul8 201 %mul13 = fmul fast float %1, %3 202 %sub = fsub fast float %add10, %mul13 203 %mul14 = fmul fast float %4, %5 204 %add15 = fadd fast float %mul14, %add12 205 %mul16 = fmul fast float %2, %3 206 %add17 = fadd fast float %mul16, %sub 207 store float %add17, float* %q, align 4 208 %arrayidx19 = getelementptr inbounds float, float* %q, i64 1 209 store float %add15, float* %arrayidx19, align 4 210 ret void 211 } 212 213 ; Single precision version of f3 214 215 ; CHECK-LABEL: f5: 216 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]] 217 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]] 218 ; CHECK: fmadd [[x]] 219 ; CHECK: fmsub [[x]] 220 ; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]] 221 ; CHECK: str [[y]] 222 223 define void @f5(float* nocapture readonly %p, float* nocapture %q) #0 { 224 entry: 225 %0 = load float, float* %p, align 4 226 %arrayidx1 = getelementptr inbounds float, float* %p, i64 1 227 %1 = load float, float* %arrayidx1, align 4 228 %arrayidx2 = getelementptr inbounds float, float* %p, i64 2 229 %2 = load float, float* %arrayidx2, align 4 230 %arrayidx3 = getelementptr inbounds float, float* %p, i64 3 231 %3 = load float, float* %arrayidx3, align 4 232 %arrayidx4 = getelementptr inbounds float, float* %p, i64 4 233 %4 = load float, float* %arrayidx4, align 4 234 %mul = fmul fast float %0, %1 235 %add = fadd fast float %mul, %4 236 %mul5 = fmul fast float %1, %2 237 %add6 = fadd fast float %mul5, %add 238 %mul7 = fmul fast float %1, %3 239 %sub = fsub fast float %add6, %mul7 240 %mul8 = fmul fast float %2, %3 241 %add9 = fadd fast float %mul8, %sub 242 %cmp = fcmp oeq float %3, 0.000000e+00 243 br i1 %cmp, label %if.then, label %if.end 244 245 if.then: ; preds = %entry 246 tail call void bitcast (void (...)* @g to void ()*)() #2 247 br label %if.end 248 249 if.end: ; preds = %if.then, %entry 250 store float %add9, float* %q, align 4 251 ret void 252 } 253 254 ; Test that regmask clobbering stops a chain sequence. 255 256 ; CHECK-LABEL: f6: 257 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 258 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 259 ; CHECK: fmadd [[x]] 260 ; CHECK: fmsub [[x]] 261 ; CHECK: fmadd d0, {{.*}}, [[x]] 262 ; CHECK: bl hh 263 ; CHECK: str d0 264 265 define void @f6(double* nocapture readonly %p, double* nocapture %q) #0 { 266 entry: 267 %0 = load double, double* %p, align 8 268 %arrayidx1 = getelementptr inbounds double, double* %p, i64 1 269 %1 = load double, double* %arrayidx1, align 8 270 %arrayidx2 = getelementptr inbounds double, double* %p, i64 2 271 %2 = load double, double* %arrayidx2, align 8 272 %arrayidx3 = getelementptr inbounds double, double* %p, i64 3 273 %3 = load double, double* %arrayidx3, align 8 274 %arrayidx4 = getelementptr inbounds double, double* %p, i64 4 275 %4 = load double, double* %arrayidx4, align 8 276 %mul = fmul fast double %0, %1 277 %add = fadd fast double %mul, %4 278 %mul5 = fmul fast double %1, %2 279 %add6 = fadd fast double %mul5, %add 280 %mul7 = fmul fast double %1, %3 281 %sub = fsub fast double %add6, %mul7 282 %mul8 = fmul fast double %2, %3 283 %add9 = fadd fast double %mul8, %sub 284 %call = tail call double @hh(double %add9) #2 285 store double %call, double* %q, align 8 286 ret void 287 } 288 289 declare double @hh(double) #1 290 291 ; Check that we correctly deal with repeated operands. 292 ; The following testcase creates: 293 ; %D1<def> = FADDDrr %D0<kill>, %D0 294 ; We'll get a crash if we naively look at the first operand, remove it 295 ; from the substitution list then look at the second operand. 296 297 ; CHECK: fmadd [[x:d[0-9]+]] 298 ; CHECK: fadd d1, [[x]], [[x]] 299 300 define void @f7(double* nocapture readonly %p, double* nocapture %q) #0 { 301 entry: 302 %0 = load double, double* %p, align 8 303 %arrayidx1 = getelementptr inbounds double, double* %p, i64 1 304 %1 = load double, double* %arrayidx1, align 8 305 %arrayidx2 = getelementptr inbounds double, double* %p, i64 2 306 %2 = load double, double* %arrayidx2, align 8 307 %arrayidx3 = getelementptr inbounds double, double* %p, i64 3 308 %3 = load double, double* %arrayidx3, align 8 309 %arrayidx4 = getelementptr inbounds double, double* %p, i64 4 310 %4 = load double, double* %arrayidx4, align 8 311 %mul = fmul fast double %0, %1 312 %add = fadd fast double %mul, %4 313 %mul5 = fmul fast double %1, %2 314 %add6 = fadd fast double %mul5, %add 315 %mul7 = fmul fast double %1, %3 316 %sub = fsub fast double %add6, %mul7 317 %mul8 = fmul fast double %2, %3 318 %add9 = fadd fast double %mul8, %sub 319 %add10 = fadd fast double %add9, %add9 320 call void @hhh(double 0.0, double %add10) 321 ret void 322 } 323 324 declare void @hhh(double, double) 325 326 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } 327 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } 328 attributes #2 = { nounwind } 329 330