1 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN 2 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD 3 4 ; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so 5 ; our test strategy is to: 6 ; * Force the pass to always perform register swapping even if the dest register is of the 7 ; correct color already (-force-all) 8 ; * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance), 9 ; and run it twice, once where it always hints odd, and once where it always hints even. 10 ; 11 ; We then use regex magic to check that in the two cases the register allocation is 12 ; different; this is what gives us the testing coverage and distinguishes cases where 13 ; the pass has done some work versus accidental regalloc. 14 15 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 16 target triple = "aarch64" 17 18 ; Non-overlapping groups - shouldn't need any changing at all. 19 20 ; CHECK-LABEL: f1: 21 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 22 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 23 ; CHECK: fmadd [[x]] 24 ; CHECK: fmsub [[x]] 25 ; CHECK: fmadd [[x]] 26 ; CHECK: str [[x]] 27 28 define void @f1(double* nocapture readonly %p, double* nocapture %q) #0 { 29 entry: 30 %0 = load double* %p, align 8 31 %arrayidx1 = getelementptr inbounds double* %p, i64 1 32 %1 = load double* %arrayidx1, align 8 33 %arrayidx2 = getelementptr inbounds double* %p, i64 2 34 %2 = load double* %arrayidx2, align 8 35 %arrayidx3 = getelementptr inbounds double* %p, i64 3 36 %3 = load double* %arrayidx3, align 8 37 %arrayidx4 = getelementptr inbounds double* %p, i64 4 38 %4 = load double* %arrayidx4, align 8 39 %mul = fmul fast double %0, %1 40 %add = fadd fast double %mul, %4 41 %mul5 = fmul fast double %1, %2 42 %add6 = fadd fast double %mul5, %add 43 %mul7 = fmul fast double %1, %3 44 %sub = fsub fast double %add6, %mul7 45 %mul8 = fmul fast double %2, %3 46 %add9 = fadd fast double %mul8, %sub 47 store double %add9, double* %q, align 8 48 %arrayidx11 = getelementptr inbounds double* %p, i64 5 49 %5 = load double* %arrayidx11, align 8 50 %arrayidx12 = getelementptr inbounds double* %p, i64 6 51 %6 = load double* %arrayidx12, align 8 52 %arrayidx13 = getelementptr inbounds double* %p, i64 7 53 %7 = load double* %arrayidx13, align 8 54 %mul15 = fmul fast double %6, %7 55 %mul16 = fmul fast double %0, %5 56 %add17 = fadd fast double %mul16, %mul15 57 %mul18 = fmul fast double %5, %6 58 %add19 = fadd fast double %mul18, %add17 59 %arrayidx20 = getelementptr inbounds double* %q, i64 1 60 store double %add19, double* %arrayidx20, align 8 61 ret void 62 } 63 64 ; Overlapping groups - coloring needed. 65 66 ; CHECK-LABEL: f2: 67 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 68 ; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]] 69 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 70 ; CHECK-ODD: fmul [[y:d[0-9]*[02468]]] 71 ; CHECK: fmadd [[x]] 72 ; CHECK: fmadd [[y]] 73 ; CHECK: fmsub [[x]] 74 ; CHECK: fmadd [[y]] 75 ; CHECK: fmadd [[x]] 76 ; CHECK: stp [[x]], [[y]] 77 78 define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 { 79 entry: 80 %0 = load double* %p, align 8 81 %arrayidx1 = getelementptr inbounds double* %p, i64 1 82 %1 = load double* %arrayidx1, align 8 83 %arrayidx2 = getelementptr inbounds double* %p, i64 2 84 %2 = load double* %arrayidx2, align 8 85 %arrayidx3 = getelementptr inbounds double* %p, i64 3 86 %3 = load double* %arrayidx3, align 8 87 %arrayidx4 = getelementptr inbounds double* %p, i64 4 88 %4 = load double* %arrayidx4, align 8 89 %arrayidx5 = getelementptr inbounds double* %p, i64 5 90 %5 = load double* %arrayidx5, align 8 91 %arrayidx6 = getelementptr inbounds double* %p, i64 6 92 %6 = load double* %arrayidx6, align 8 93 %arrayidx7 = getelementptr inbounds double* %p, i64 7 94 %7 = load double* %arrayidx7, align 8 95 %mul = fmul fast double %0, %1 96 %add = fadd fast double %mul, %7 97 %mul8 = fmul fast double %5, %6 98 %mul9 = fmul fast double %1, %2 99 %add10 = fadd fast double %mul9, %add 100 %mul11 = fmul fast double %3, %4 101 %add12 = fadd fast double %mul11, %mul8 102 %mul13 = fmul fast double %1, %3 103 %sub = fsub fast double %add10, %mul13 104 %mul14 = fmul fast double %4, %5 105 %add15 = fadd fast double %mul14, %add12 106 %mul16 = fmul fast double %2, %3 107 %add17 = fadd fast double %mul16, %sub 108 store double %add17, double* %q, align 8 109 %arrayidx19 = getelementptr inbounds double* %q, i64 1 110 store double %add15, double* %arrayidx19, align 8 111 ret void 112 } 113 114 ; Dest register is live on block exit - fixup needed. 115 116 ; CHECK-LABEL: f3: 117 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 118 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 119 ; CHECK: fmadd [[x]] 120 ; CHECK: fmsub [[x]] 121 ; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]] 122 ; CHECK: str [[y]] 123 124 define void @f3(double* nocapture readonly %p, double* nocapture %q) #0 { 125 entry: 126 %0 = load double* %p, align 8 127 %arrayidx1 = getelementptr inbounds double* %p, i64 1 128 %1 = load double* %arrayidx1, align 8 129 %arrayidx2 = getelementptr inbounds double* %p, i64 2 130 %2 = load double* %arrayidx2, align 8 131 %arrayidx3 = getelementptr inbounds double* %p, i64 3 132 %3 = load double* %arrayidx3, align 8 133 %arrayidx4 = getelementptr inbounds double* %p, i64 4 134 %4 = load double* %arrayidx4, align 8 135 %mul = fmul fast double %0, %1 136 %add = fadd fast double %mul, %4 137 %mul5 = fmul fast double %1, %2 138 %add6 = fadd fast double %mul5, %add 139 %mul7 = fmul fast double %1, %3 140 %sub = fsub fast double %add6, %mul7 141 %mul8 = fmul fast double %2, %3 142 %add9 = fadd fast double %mul8, %sub 143 %cmp = fcmp oeq double %3, 0.000000e+00 144 br i1 %cmp, label %if.then, label %if.end 145 146 if.then: ; preds = %entry 147 tail call void bitcast (void (...)* @g to void ()*)() #2 148 br label %if.end 149 150 if.end: ; preds = %if.then, %entry 151 store double %add9, double* %q, align 8 152 ret void 153 } 154 155 declare void @g(...) #1 156 157 ; Single precision version of f2. 158 159 ; CHECK-LABEL: f4: 160 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]] 161 ; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]] 162 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]] 163 ; CHECK-ODD: fmul [[y:s[0-9]*[02468]]] 164 ; CHECK: fmadd [[x]] 165 ; CHECK: fmadd [[y]] 166 ; CHECK: fmsub [[x]] 167 ; CHECK: fmadd [[y]] 168 ; CHECK: fmadd [[x]] 169 ; CHECK: stp [[x]], [[y]] 170 171 define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 { 172 entry: 173 %0 = load float* %p, align 4 174 %arrayidx1 = getelementptr inbounds float* %p, i64 1 175 %1 = load float* %arrayidx1, align 4 176 %arrayidx2 = getelementptr inbounds float* %p, i64 2 177 %2 = load float* %arrayidx2, align 4 178 %arrayidx3 = getelementptr inbounds float* %p, i64 3 179 %3 = load float* %arrayidx3, align 4 180 %arrayidx4 = getelementptr inbounds float* %p, i64 4 181 %4 = load float* %arrayidx4, align 4 182 %arrayidx5 = getelementptr inbounds float* %p, i64 5 183 %5 = load float* %arrayidx5, align 4 184 %arrayidx6 = getelementptr inbounds float* %p, i64 6 185 %6 = load float* %arrayidx6, align 4 186 %arrayidx7 = getelementptr inbounds float* %p, i64 7 187 %7 = load float* %arrayidx7, align 4 188 %mul = fmul fast float %0, %1 189 %add = fadd fast float %mul, %7 190 %mul8 = fmul fast float %5, %6 191 %mul9 = fmul fast float %1, %2 192 %add10 = fadd fast float %mul9, %add 193 %mul11 = fmul fast float %3, %4 194 %add12 = fadd fast float %mul11, %mul8 195 %mul13 = fmul fast float %1, %3 196 %sub = fsub fast float %add10, %mul13 197 %mul14 = fmul fast float %4, %5 198 %add15 = fadd fast float %mul14, %add12 199 %mul16 = fmul fast float %2, %3 200 %add17 = fadd fast float %mul16, %sub 201 store float %add17, float* %q, align 4 202 %arrayidx19 = getelementptr inbounds float* %q, i64 1 203 store float %add15, float* %arrayidx19, align 4 204 ret void 205 } 206 207 ; Single precision version of f3 208 209 ; CHECK-LABEL: f5: 210 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]] 211 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]] 212 ; CHECK: fmadd [[x]] 213 ; CHECK: fmsub [[x]] 214 ; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]] 215 ; CHECK: str [[y]] 216 217 define void @f5(float* nocapture readonly %p, float* nocapture %q) #0 { 218 entry: 219 %0 = load float* %p, align 4 220 %arrayidx1 = getelementptr inbounds float* %p, i64 1 221 %1 = load float* %arrayidx1, align 4 222 %arrayidx2 = getelementptr inbounds float* %p, i64 2 223 %2 = load float* %arrayidx2, align 4 224 %arrayidx3 = getelementptr inbounds float* %p, i64 3 225 %3 = load float* %arrayidx3, align 4 226 %arrayidx4 = getelementptr inbounds float* %p, i64 4 227 %4 = load float* %arrayidx4, align 4 228 %mul = fmul fast float %0, %1 229 %add = fadd fast float %mul, %4 230 %mul5 = fmul fast float %1, %2 231 %add6 = fadd fast float %mul5, %add 232 %mul7 = fmul fast float %1, %3 233 %sub = fsub fast float %add6, %mul7 234 %mul8 = fmul fast float %2, %3 235 %add9 = fadd fast float %mul8, %sub 236 %cmp = fcmp oeq float %3, 0.000000e+00 237 br i1 %cmp, label %if.then, label %if.end 238 239 if.then: ; preds = %entry 240 tail call void bitcast (void (...)* @g to void ()*)() #2 241 br label %if.end 242 243 if.end: ; preds = %if.then, %entry 244 store float %add9, float* %q, align 4 245 ret void 246 } 247 248 ; Test that regmask clobbering stops a chain sequence. 249 250 ; CHECK-LABEL: f6: 251 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 252 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 253 ; CHECK: fmadd [[x]] 254 ; CHECK: fmsub [[x]] 255 ; CHECK: fmadd d0, {{.*}}, [[x]] 256 ; CHECK: bl hh 257 ; CHECK: str d0 258 259 define void @f6(double* nocapture readonly %p, double* nocapture %q) #0 { 260 entry: 261 %0 = load double* %p, align 8 262 %arrayidx1 = getelementptr inbounds double* %p, i64 1 263 %1 = load double* %arrayidx1, align 8 264 %arrayidx2 = getelementptr inbounds double* %p, i64 2 265 %2 = load double* %arrayidx2, align 8 266 %arrayidx3 = getelementptr inbounds double* %p, i64 3 267 %3 = load double* %arrayidx3, align 8 268 %arrayidx4 = getelementptr inbounds double* %p, i64 4 269 %4 = load double* %arrayidx4, align 8 270 %mul = fmul fast double %0, %1 271 %add = fadd fast double %mul, %4 272 %mul5 = fmul fast double %1, %2 273 %add6 = fadd fast double %mul5, %add 274 %mul7 = fmul fast double %1, %3 275 %sub = fsub fast double %add6, %mul7 276 %mul8 = fmul fast double %2, %3 277 %add9 = fadd fast double %mul8, %sub 278 %call = tail call double @hh(double %add9) #2 279 store double %call, double* %q, align 8 280 ret void 281 } 282 283 declare double @hh(double) #1 284 285 ; Check that we correctly deal with repeated operands. 286 ; The following testcase creates: 287 ; %D1<def> = FADDDrr %D0<kill>, %D0 288 ; We'll get a crash if we naively look at the first operand, remove it 289 ; from the substitution list then look at the second operand. 290 291 ; CHECK: fmadd [[x:d[0-9]+]] 292 ; CHECK: fadd d1, [[x]], [[x]] 293 294 define void @f7(double* nocapture readonly %p, double* nocapture %q) #0 { 295 entry: 296 %0 = load double* %p, align 8 297 %arrayidx1 = getelementptr inbounds double* %p, i64 1 298 %1 = load double* %arrayidx1, align 8 299 %arrayidx2 = getelementptr inbounds double* %p, i64 2 300 %2 = load double* %arrayidx2, align 8 301 %arrayidx3 = getelementptr inbounds double* %p, i64 3 302 %3 = load double* %arrayidx3, align 8 303 %arrayidx4 = getelementptr inbounds double* %p, i64 4 304 %4 = load double* %arrayidx4, align 8 305 %mul = fmul fast double %0, %1 306 %add = fadd fast double %mul, %4 307 %mul5 = fmul fast double %1, %2 308 %add6 = fadd fast double %mul5, %add 309 %mul7 = fmul fast double %1, %3 310 %sub = fsub fast double %add6, %mul7 311 %mul8 = fmul fast double %2, %3 312 %add9 = fadd fast double %mul8, %sub 313 %add10 = fadd fast double %add9, %add9 314 call void @hhh(double 0.0, double %add10) 315 ret void 316 } 317 318 declare void @hhh(double, double) 319 320 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } 321 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } 322 attributes #2 = { nounwind } 323 324