1 ; RUN: llc -mtriple=i686-linux -pre-RA-sched=source < %s | FileCheck %s 2 ; RUN: opt -disable-output -debugify < %s 3 4 declare void @error(i32 %i, i32 %a, i32 %b) 5 6 define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) { 7 ; Test a chain of ifs, where the block guarded by the if is error handling code 8 ; that is not expected to run. 9 ; CHECK-LABEL: test_ifchains: 10 ; CHECK: %entry 11 ; CHECK-NOT: .p2align 12 ; CHECK: %else1 13 ; CHECK-NOT: .p2align 14 ; CHECK: %else2 15 ; CHECK-NOT: .p2align 16 ; CHECK: %else3 17 ; CHECK-NOT: .p2align 18 ; CHECK: %else4 19 ; CHECK-NOT: .p2align 20 ; CHECK: %exit 21 ; CHECK: %then1 22 ; CHECK: %then2 23 ; CHECK: %then3 24 ; CHECK: %then4 25 ; CHECK: %then5 26 27 entry: 28 %gep1 = getelementptr i32, i32* %a, i32 1 29 %val1 = load i32, i32* %gep1 30 %cond1 = icmp ugt i32 %val1, 1 31 br i1 %cond1, label %then1, label %else1, !prof !0 32 33 then1: 34 call void @error(i32 %i, i32 1, i32 %b) 35 br label %else1 36 37 else1: 38 %gep2 = getelementptr i32, i32* %a, i32 2 39 %val2 = load i32, i32* %gep2 40 %cond2 = icmp ugt i32 %val2, 2 41 br i1 %cond2, label %then2, label %else2, !prof !0 42 43 then2: 44 call void @error(i32 %i, i32 1, i32 %b) 45 br label %else2 46 47 else2: 48 %gep3 = getelementptr i32, i32* %a, i32 3 49 %val3 = load i32, i32* %gep3 50 %cond3 = icmp ugt i32 %val3, 3 51 br i1 %cond3, label %then3, label %else3, !prof !0 52 53 then3: 54 call void @error(i32 %i, i32 1, i32 %b) 55 br label %else3 56 57 else3: 58 %gep4 = getelementptr i32, i32* %a, i32 4 59 %val4 = load i32, i32* %gep4 60 %cond4 = icmp ugt i32 %val4, 4 61 br i1 %cond4, label %then4, label %else4, !prof !0 62 63 then4: 64 call void @error(i32 %i, i32 1, i32 %b) 65 br label %else4 66 67 else4: 68 %gep5 = getelementptr i32, i32* %a, i32 3 69 %val5 = load i32, i32* %gep5 70 %cond5 = icmp ugt i32 %val5, 3 71 br i1 %cond5, label %then5, label %exit, !prof !0 72 73 then5: 74 call void @error(i32 %i, i32 1, i32 %b) 75 br label %exit 76 77 exit: 78 ret i32 %b 79 } 80 81 define i32 @test_loop_cold_blocks(i32 %i, i32* %a) { 82 ; Check that we sink cold loop blocks after the hot loop body. 83 ; CHECK-LABEL: test_loop_cold_blocks: 84 ; CHECK: %entry 85 ; CHECK-NOT: .p2align 86 ; CHECK: %unlikely1 87 ; CHECK-NOT: .p2align 88 ; CHECK: %unlikely2 89 ; CHECK: .p2align 90 ; CHECK: %body1 91 ; CHECK: %body2 92 ; CHECK: %body3 93 ; CHECK: %exit 94 95 entry: 96 br label %body1 97 98 body1: 99 %iv = phi i32 [ 0, %entry ], [ %next, %body3 ] 100 %base = phi i32 [ 0, %entry ], [ %sum, %body3 ] 101 %unlikelycond1 = icmp slt i32 %base, 42 102 br i1 %unlikelycond1, label %unlikely1, label %body2, !prof !0 103 104 unlikely1: 105 call void @error(i32 %i, i32 1, i32 %base) 106 br label %body2 107 108 body2: 109 %unlikelycond2 = icmp sgt i32 %base, 21 110 br i1 %unlikelycond2, label %unlikely2, label %body3, !prof !0 111 112 unlikely2: 113 call void @error(i32 %i, i32 2, i32 %base) 114 br label %body3 115 116 body3: 117 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 118 %0 = load i32, i32* %arrayidx 119 %sum = add nsw i32 %0, %base 120 %next = add i32 %iv, 1 121 %exitcond = icmp eq i32 %next, %i 122 br i1 %exitcond, label %exit, label %body1 123 124 exit: 125 ret i32 %sum 126 } 127 128 !0 = !{!"branch_weights", i32 4, i32 64} 129 130 define i32 @test_loop_early_exits(i32 %i, i32* %a) { 131 ; Check that we sink early exit blocks out of loop bodies. 132 ; CHECK-LABEL: test_loop_early_exits: 133 ; CHECK: %entry 134 ; CHECK: %body1 135 ; CHECK: %body2 136 ; CHECK: %body3 137 ; CHECK: %body4 138 ; CHECK: %exit 139 ; CHECK: %bail1 140 ; CHECK: %bail2 141 ; CHECK: %bail3 142 143 entry: 144 br label %body1 145 146 body1: 147 %iv = phi i32 [ 0, %entry ], [ %next, %body4 ] 148 %base = phi i32 [ 0, %entry ], [ %sum, %body4 ] 149 %bailcond1 = icmp eq i32 %base, 42 150 br i1 %bailcond1, label %bail1, label %body2 151 152 bail1: 153 ret i32 -1 154 155 body2: 156 %bailcond2 = icmp eq i32 %base, 43 157 br i1 %bailcond2, label %bail2, label %body3 158 159 bail2: 160 ret i32 -2 161 162 body3: 163 %bailcond3 = icmp eq i32 %base, 44 164 br i1 %bailcond3, label %bail3, label %body4 165 166 bail3: 167 ret i32 -3 168 169 body4: 170 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 171 %0 = load i32, i32* %arrayidx 172 %sum = add nsw i32 %0, %base 173 %next = add i32 %iv, 1 174 %exitcond = icmp eq i32 %next, %i 175 br i1 %exitcond, label %exit, label %body1 176 177 exit: 178 ret i32 %sum 179 } 180 181 ; Tail duplication during layout can entirely remove body0 by duplicating it 182 ; into the entry block and into body1. This is a good thing but it isn't what 183 ; this test is looking for. So to make the blocks longer so they don't get 184 ; duplicated, we add some calls to dummy. 185 declare void @dummy() 186 187 define i32 @test_loop_rotate(i32 %i, i32* %a) { 188 ; Check that we rotate conditional exits from the loop to the bottom of the 189 ; loop, eliminating unconditional branches to the top. 190 ; CHECK-LABEL: test_loop_rotate: 191 ; CHECK: %entry 192 ; CHECK: %body1 193 ; CHECK: %body0 194 ; CHECK: %exit 195 196 entry: 197 br label %body0 198 199 body0: 200 %iv = phi i32 [ 0, %entry ], [ %next, %body1 ] 201 %base = phi i32 [ 0, %entry ], [ %sum, %body1 ] 202 %next = add i32 %iv, 1 203 %exitcond = icmp eq i32 %next, %i 204 call void @dummy() 205 call void @dummy() 206 br i1 %exitcond, label %exit, label %body1 207 208 body1: 209 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 210 %0 = load i32, i32* %arrayidx 211 %sum = add nsw i32 %0, %base 212 %bailcond1 = icmp eq i32 %sum, 42 213 br label %body0 214 215 exit: 216 ret i32 %base 217 } 218 219 define i32 @test_no_loop_rotate(i32 %i, i32* %a) { 220 ; Check that we don't try to rotate a loop which is already laid out with 221 ; fallthrough opportunities into the top and out of the bottom. 222 ; CHECK-LABEL: test_no_loop_rotate: 223 ; CHECK: %entry 224 ; CHECK: %body0 225 ; CHECK: %body1 226 ; CHECK: %exit 227 228 entry: 229 br label %body0 230 231 body0: 232 %iv = phi i32 [ 0, %entry ], [ %next, %body1 ] 233 %base = phi i32 [ 0, %entry ], [ %sum, %body1 ] 234 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 235 %0 = load i32, i32* %arrayidx 236 %sum = add nsw i32 %0, %base 237 %bailcond1 = icmp eq i32 %sum, 42 238 br i1 %bailcond1, label %exit, label %body1 239 240 body1: 241 %next = add i32 %iv, 1 242 %exitcond = icmp eq i32 %next, %i 243 br i1 %exitcond, label %exit, label %body0 244 245 exit: 246 ret i32 %base 247 } 248 249 define i32 @test_loop_align(i32 %i, i32* %a) { 250 ; Check that we provide basic loop body alignment with the block placement 251 ; pass. 252 ; CHECK-LABEL: test_loop_align: 253 ; CHECK: %entry 254 ; CHECK: .p2align [[ALIGN:[0-9]+]], 255 ; CHECK-NEXT: %body 256 ; CHECK: %exit 257 258 entry: 259 br label %body 260 261 body: 262 %iv = phi i32 [ 0, %entry ], [ %next, %body ] 263 %base = phi i32 [ 0, %entry ], [ %sum, %body ] 264 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 265 %0 = load i32, i32* %arrayidx 266 %sum = add nsw i32 %0, %base 267 %next = add i32 %iv, 1 268 %exitcond = icmp eq i32 %next, %i 269 br i1 %exitcond, label %exit, label %body 270 271 exit: 272 ret i32 %sum 273 } 274 275 define i32 @test_nested_loop_align(i32 %i, i32* %a, i32* %b) { 276 ; Check that we provide nested loop body alignment. 277 ; CHECK-LABEL: test_nested_loop_align: 278 ; CHECK: %entry 279 ; CHECK: .p2align [[ALIGN]], 280 ; CHECK-NEXT: %loop.body.1 281 ; CHECK: .p2align [[ALIGN]], 282 ; CHECK-NEXT: %inner.loop.body 283 ; CHECK-NOT: .p2align 284 ; CHECK: %exit 285 286 entry: 287 br label %loop.body.1 288 289 loop.body.1: 290 %iv = phi i32 [ 0, %entry ], [ %next, %loop.body.2 ] 291 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv 292 %bidx = load i32, i32* %arrayidx 293 br label %inner.loop.body 294 295 inner.loop.body: 296 %inner.iv = phi i32 [ 0, %loop.body.1 ], [ %inner.next, %inner.loop.body ] 297 %base = phi i32 [ 0, %loop.body.1 ], [ %sum, %inner.loop.body ] 298 %scaled_idx = mul i32 %bidx, %iv 299 %inner.arrayidx = getelementptr inbounds i32, i32* %b, i32 %scaled_idx 300 %0 = load i32, i32* %inner.arrayidx 301 %sum = add nsw i32 %0, %base 302 %inner.next = add i32 %iv, 1 303 %inner.exitcond = icmp eq i32 %inner.next, %i 304 br i1 %inner.exitcond, label %loop.body.2, label %inner.loop.body 305 306 loop.body.2: 307 %next = add i32 %iv, 1 308 %exitcond = icmp eq i32 %next, %i 309 br i1 %exitcond, label %exit, label %loop.body.1 310 311 exit: 312 ret i32 %sum 313 } 314 315 define void @unnatural_cfg1() { 316 ; Test that we can handle a loop with an inner unnatural loop at the end of 317 ; a function. This is a gross CFG reduced out of the single source GCC. 318 ; CHECK-LABEL: unnatural_cfg1 319 ; CHECK: %entry 320 ; CHECK: %loop.header 321 ; CHECK: %loop.body2 322 ; CHECK: %loop.body3 323 324 entry: 325 br label %loop.header 326 327 loop.header: 328 br label %loop.body1 329 330 loop.body1: 331 br i1 undef, label %loop.body3, label %loop.body2 332 333 loop.body2: 334 %ptr = load i32*, i32** undef, align 4 335 br label %loop.body3 336 337 loop.body3: 338 %myptr = phi i32* [ %ptr2, %loop.body5 ], [ %ptr, %loop.body2 ], [ undef, %loop.body1 ] 339 %bcmyptr = bitcast i32* %myptr to i32* 340 %val = load i32, i32* %bcmyptr, align 4 341 %comp = icmp eq i32 %val, 48 342 br i1 %comp, label %loop.body4, label %loop.body5 343 344 loop.body4: 345 br i1 undef, label %loop.header, label %loop.body5 346 347 loop.body5: 348 %ptr2 = load i32*, i32** undef, align 4 349 br label %loop.body3 350 } 351 352 define void @unnatural_cfg2() { 353 ; Test that we can handle a loop with a nested natural loop *and* an unnatural 354 ; loop. This was reduced from a crash on block placement when run over 355 ; single-source GCC. 356 ; CHECK-LABEL: unnatural_cfg2 357 ; CHECK: %entry 358 ; CHECK: %loop.header 359 ; CHECK: %loop.body1 360 ; CHECK: %loop.body2 361 ; CHECK: %loop.body4 362 ; CHECK: %loop.inner2.begin 363 ; CHECK: %loop.inner2.begin 364 ; CHECK: %loop.body3 365 ; CHECK: %loop.inner1.begin 366 ; CHECK: %bail 367 368 entry: 369 br label %loop.header 370 371 loop.header: 372 %comp0 = icmp eq i32* undef, null 373 br i1 %comp0, label %bail, label %loop.body1 374 375 loop.body1: 376 %val0 = load i32*, i32** undef, align 4 377 br i1 undef, label %loop.body2, label %loop.inner1.begin 378 379 loop.body2: 380 br i1 undef, label %loop.body4, label %loop.body3 381 382 loop.body3: 383 %ptr1 = getelementptr inbounds i32, i32* %val0, i32 0 384 %castptr1 = bitcast i32* %ptr1 to i32** 385 %val1 = load i32*, i32** %castptr1, align 4 386 br label %loop.inner1.begin 387 388 loop.inner1.begin: 389 %valphi = phi i32* [ %val2, %loop.inner1.end ], [ %val1, %loop.body3 ], [ %val0, %loop.body1 ] 390 %castval = bitcast i32* %valphi to i32* 391 %comp1 = icmp eq i32 undef, 48 392 br i1 %comp1, label %loop.inner1.end, label %loop.body4 393 394 loop.inner1.end: 395 %ptr2 = getelementptr inbounds i32, i32* %valphi, i32 0 396 %castptr2 = bitcast i32* %ptr2 to i32** 397 %val2 = load i32*, i32** %castptr2, align 4 398 br label %loop.inner1.begin 399 400 loop.body4.dead: 401 br label %loop.body4 402 403 loop.body4: 404 %comp2 = icmp ult i32 undef, 3 405 br i1 %comp2, label %loop.inner2.begin, label %loop.end 406 407 loop.inner2.begin: 408 br i1 false, label %loop.end, label %loop.inner2.end 409 410 loop.inner2.end: 411 %comp3 = icmp eq i32 undef, 1769472 412 br i1 %comp3, label %loop.end, label %loop.inner2.begin 413 414 loop.end: 415 br label %loop.header 416 417 bail: 418 unreachable 419 } 420 421 define i32 @problematic_switch() { 422 ; This function's CFG caused overlow in the machine branch probability 423 ; calculation, triggering asserts. Make sure we don't crash on it. 424 ; CHECK: problematic_switch 425 426 entry: 427 switch i32 undef, label %exit [ 428 i32 879, label %bogus 429 i32 877, label %step 430 i32 876, label %step 431 i32 875, label %step 432 i32 874, label %step 433 i32 873, label %step 434 i32 872, label %step 435 i32 868, label %step 436 i32 867, label %step 437 i32 866, label %step 438 i32 861, label %step 439 i32 860, label %step 440 i32 856, label %step 441 i32 855, label %step 442 i32 854, label %step 443 i32 831, label %step 444 i32 830, label %step 445 i32 829, label %step 446 i32 828, label %step 447 i32 815, label %step 448 i32 814, label %step 449 i32 811, label %step 450 i32 806, label %step 451 i32 805, label %step 452 i32 804, label %step 453 i32 803, label %step 454 i32 802, label %step 455 i32 801, label %step 456 i32 800, label %step 457 i32 799, label %step 458 i32 798, label %step 459 i32 797, label %step 460 i32 796, label %step 461 i32 795, label %step 462 ] 463 bogus: 464 unreachable 465 step: 466 br label %exit 467 exit: 468 %merge = phi i32 [ 3, %step ], [ 6, %entry ] 469 ret i32 %merge 470 } 471 472 define void @fpcmp_unanalyzable_branch(i1 %cond) { 473 ; This function's CFG contains an once-unanalyzable branch (une on floating 474 ; points). As now it becomes analyzable, we should get best layout in which each 475 ; edge in 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end' is 476 ; fall-through. 477 ; CHECK-LABEL: fpcmp_unanalyzable_branch: 478 ; CHECK: # %bb.0: # %entry 479 ; CHECK: # %bb.1: # %entry.if.then_crit_edge 480 ; CHECK: .LBB10_5: # %if.then 481 ; CHECK: .LBB10_6: # %if.end 482 ; CHECK: # %bb.3: # %exit 483 ; CHECK: jne .LBB10_4 484 ; CHECK-NEXT: jnp .LBB10_6 485 ; CHECK: jmp .LBB10_5 486 487 entry: 488 ; Note that this branch must be strongly biased toward 489 ; 'entry.if.then_crit_edge' to ensure that we would try to form a chain for 490 ; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end'. 491 br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1 492 493 entry.if.then_crit_edge: 494 %.pre14 = load i8, i8* undef, align 1 495 br label %if.then 496 497 lor.lhs.false: 498 br i1 undef, label %if.end, label %exit 499 500 exit: 501 %cmp.i = fcmp une double 0.000000e+00, undef 502 br i1 %cmp.i, label %if.then, label %if.end, !prof !3 503 504 if.then: 505 %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ] 506 %1 = and i8 %0, 1 507 store i8 %1, i8* undef, align 4 508 br label %if.end 509 510 if.end: 511 ret void 512 } 513 514 !1 = !{!"branch_weights", i32 1000, i32 1} 515 !3 = !{!"branch_weights", i32 1, i32 1000} 516 517 declare i32 @f() 518 declare i32 @g() 519 declare i32 @h(i32 %x) 520 521 define i32 @test_global_cfg_break_profitability() { 522 ; Check that our metrics for the profitability of a CFG break are global rather 523 ; than local. A successor may be very hot, but if the current block isn't, it 524 ; doesn't matter. Within this test the 'then' block is slightly warmer than the 525 ; 'else' block, but not nearly enough to merit merging it with the exit block 526 ; even though the probability of 'then' branching to the 'exit' block is very 527 ; high. 528 ; CHECK: test_global_cfg_break_profitability 529 ; CHECK: calll {{_?}}f 530 ; CHECK: calll {{_?}}g 531 ; CHECK: calll {{_?}}h 532 ; CHECK: ret 533 534 entry: 535 br i1 undef, label %then, label %else, !prof !2 536 537 then: 538 %then.result = call i32 @f() 539 br label %exit 540 541 else: 542 %else.result = call i32 @g() 543 br label %exit 544 545 exit: 546 %result = phi i32 [ %then.result, %then ], [ %else.result, %else ] 547 %result2 = call i32 @h(i32 %result) 548 ret i32 %result 549 } 550 551 !2 = !{!"branch_weights", i32 3, i32 1} 552 553 declare i32 @__gxx_personality_v0(...) 554 555 define void @test_eh_lpad_successor() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { 556 ; Some times the landing pad ends up as the first successor of an invoke block. 557 ; When this happens, a strange result used to fall out of updateTerminators: we 558 ; didn't correctly locate the fallthrough successor, assuming blindly that the 559 ; first one was the fallthrough successor. As a result, we would add an 560 ; erroneous jump to the landing pad thinking *that* was the default successor. 561 ; CHECK-LABEL: test_eh_lpad_successor 562 ; CHECK: %entry 563 ; CHECK-NOT: jmp 564 ; CHECK: %loop 565 566 entry: 567 invoke i32 @f() to label %preheader unwind label %lpad 568 569 preheader: 570 br label %loop 571 572 lpad: 573 %lpad.val = landingpad { i8*, i32 } 574 cleanup 575 resume { i8*, i32 } %lpad.val 576 577 loop: 578 br label %loop 579 } 580 581 declare void @fake_throw() noreturn 582 583 define void @test_eh_throw() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { 584 ; For blocks containing a 'throw' (or similar functionality), we have 585 ; a no-return invoke. In this case, only EH successors will exist, and 586 ; fallthrough simply won't occur. Make sure we don't crash trying to update 587 ; terminators for such constructs. 588 ; 589 ; CHECK-LABEL: test_eh_throw 590 ; CHECK: %entry 591 ; CHECK: %cleanup 592 593 entry: 594 invoke void @fake_throw() to label %continue unwind label %cleanup 595 596 continue: 597 unreachable 598 599 cleanup: 600 %0 = landingpad { i8*, i32 } 601 cleanup 602 unreachable 603 } 604 605 define void @test_unnatural_cfg_backwards_inner_loop() { 606 ; Test that when we encounter an unnatural CFG structure after having formed 607 ; a chain for an inner loop which happened to be laid out backwards we don't 608 ; attempt to merge onto the wrong end of the inner loop just because we find it 609 ; first. This was reduced from a crasher in GCC's single source. 610 ; 611 ; CHECK-LABEL: test_unnatural_cfg_backwards_inner_loop 612 ; CHECK: %entry 613 ; CHECK: %loop2b 614 ; CHECK: %loop3 615 616 entry: 617 br i1 undef, label %loop2a, label %body 618 619 body: 620 br label %loop2a 621 622 loop1: 623 %next.load = load i32*, i32** undef 624 br i1 %comp.a, label %loop2a, label %loop2b 625 626 loop2a: 627 %var = phi i32* [ null, %entry ], [ null, %body ], [ %next.phi, %loop1 ] 628 %next.var = phi i32* [ null, %entry ], [ undef, %body ], [ %next.load, %loop1 ] 629 %comp.a = icmp eq i32* %var, null 630 br label %loop3 631 632 loop2b: 633 %gep = getelementptr inbounds i32, i32* %var.phi, i32 0 634 %next.ptr = bitcast i32* %gep to i32** 635 store i32* %next.phi, i32** %next.ptr 636 br label %loop3 637 638 loop3: 639 %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ] 640 %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ] 641 br label %loop1 642 } 643 644 define void @unanalyzable_branch_to_loop_header() { 645 ; Ensure that we can handle unanalyzable branches into loop headers. We 646 ; pre-form chains for unanalyzable branches, and will find the tail end of that 647 ; at the start of the loop. This function uses floating point comparison 648 ; fallthrough because that happens to always produce unanalyzable branches on 649 ; x86. 650 ; 651 ; CHECK-LABEL: unanalyzable_branch_to_loop_header 652 ; CHECK: %entry 653 ; CHECK: %loop 654 ; CHECK: %exit 655 656 entry: 657 %cmp = fcmp une double 0.000000e+00, undef 658 br i1 %cmp, label %loop, label %exit 659 660 loop: 661 %cond = icmp eq i8 undef, 42 662 br i1 %cond, label %exit, label %loop 663 664 exit: 665 ret void 666 } 667 668 define void @unanalyzable_branch_to_best_succ(i1 %cond) { 669 ; Ensure that we can handle unanalyzable branches where the destination block 670 ; gets selected as the optimal successor to merge. 671 ; 672 ; This branch is now analyzable and hence the destination block becomes the 673 ; hotter one. The right order is entry->bar->exit->foo. 674 ; 675 ; CHECK-LABEL: unanalyzable_branch_to_best_succ 676 ; CHECK: %entry 677 ; CHECK: %bar 678 ; CHECK: %exit 679 ; CHECK: %foo 680 681 entry: 682 ; Bias this branch toward bar to ensure we form that chain. 683 br i1 %cond, label %bar, label %foo, !prof !1 684 685 foo: 686 %cmp = fcmp une double 0.000000e+00, undef 687 br i1 %cmp, label %bar, label %exit 688 689 bar: 690 call i32 @f() 691 br label %exit 692 693 exit: 694 ret void 695 } 696 697 define void @unanalyzable_branch_to_free_block(float %x) { 698 ; Ensure that we can handle unanalyzable branches where the destination block 699 ; gets selected as the best free block in the CFG. 700 ; 701 ; CHECK-LABEL: unanalyzable_branch_to_free_block 702 ; CHECK: %entry 703 ; CHECK: %a 704 ; CHECK: %b 705 ; CHECK: %c 706 ; CHECK: %exit 707 708 entry: 709 br i1 undef, label %a, label %b 710 711 a: 712 call i32 @f() 713 br label %c 714 715 b: 716 %cmp = fcmp une float %x, undef 717 br i1 %cmp, label %c, label %exit 718 719 c: 720 call i32 @g() 721 br label %exit 722 723 exit: 724 ret void 725 } 726 727 define void @many_unanalyzable_branches() { 728 ; Ensure that we don't crash as we're building up many unanalyzable branches, 729 ; blocks, and loops. 730 ; 731 ; CHECK-LABEL: many_unanalyzable_branches 732 ; CHECK: %entry 733 ; CHECK: %exit 734 735 entry: 736 br label %0 737 738 %val0 = load volatile float, float* undef 739 %cmp0 = fcmp une float %val0, undef 740 br i1 %cmp0, label %1, label %0 741 %val1 = load volatile float, float* undef 742 %cmp1 = fcmp une float %val1, undef 743 br i1 %cmp1, label %2, label %1 744 %val2 = load volatile float, float* undef 745 %cmp2 = fcmp une float %val2, undef 746 br i1 %cmp2, label %3, label %2 747 %val3 = load volatile float, float* undef 748 %cmp3 = fcmp une float %val3, undef 749 br i1 %cmp3, label %4, label %3 750 %val4 = load volatile float, float* undef 751 %cmp4 = fcmp une float %val4, undef 752 br i1 %cmp4, label %5, label %4 753 %val5 = load volatile float, float* undef 754 %cmp5 = fcmp une float %val5, undef 755 br i1 %cmp5, label %6, label %5 756 %val6 = load volatile float, float* undef 757 %cmp6 = fcmp une float %val6, undef 758 br i1 %cmp6, label %7, label %6 759 %val7 = load volatile float, float* undef 760 %cmp7 = fcmp une float %val7, undef 761 br i1 %cmp7, label %8, label %7 762 %val8 = load volatile float, float* undef 763 %cmp8 = fcmp une float %val8, undef 764 br i1 %cmp8, label %9, label %8 765 %val9 = load volatile float, float* undef 766 %cmp9 = fcmp une float %val9, undef 767 br i1 %cmp9, label %10, label %9 768 %val10 = load volatile float, float* undef 769 %cmp10 = fcmp une float %val10, undef 770 br i1 %cmp10, label %11, label %10 771 %val11 = load volatile float, float* undef 772 %cmp11 = fcmp une float %val11, undef 773 br i1 %cmp11, label %12, label %11 774 %val12 = load volatile float, float* undef 775 %cmp12 = fcmp une float %val12, undef 776 br i1 %cmp12, label %13, label %12 777 %val13 = load volatile float, float* undef 778 %cmp13 = fcmp une float %val13, undef 779 br i1 %cmp13, label %14, label %13 780 %val14 = load volatile float, float* undef 781 %cmp14 = fcmp une float %val14, undef 782 br i1 %cmp14, label %15, label %14 783 %val15 = load volatile float, float* undef 784 %cmp15 = fcmp une float %val15, undef 785 br i1 %cmp15, label %16, label %15 786 %val16 = load volatile float, float* undef 787 %cmp16 = fcmp une float %val16, undef 788 br i1 %cmp16, label %17, label %16 789 %val17 = load volatile float, float* undef 790 %cmp17 = fcmp une float %val17, undef 791 br i1 %cmp17, label %18, label %17 792 %val18 = load volatile float, float* undef 793 %cmp18 = fcmp une float %val18, undef 794 br i1 %cmp18, label %19, label %18 795 %val19 = load volatile float, float* undef 796 %cmp19 = fcmp une float %val19, undef 797 br i1 %cmp19, label %20, label %19 798 %val20 = load volatile float, float* undef 799 %cmp20 = fcmp une float %val20, undef 800 br i1 %cmp20, label %21, label %20 801 %val21 = load volatile float, float* undef 802 %cmp21 = fcmp une float %val21, undef 803 br i1 %cmp21, label %22, label %21 804 %val22 = load volatile float, float* undef 805 %cmp22 = fcmp une float %val22, undef 806 br i1 %cmp22, label %23, label %22 807 %val23 = load volatile float, float* undef 808 %cmp23 = fcmp une float %val23, undef 809 br i1 %cmp23, label %24, label %23 810 %val24 = load volatile float, float* undef 811 %cmp24 = fcmp une float %val24, undef 812 br i1 %cmp24, label %25, label %24 813 %val25 = load volatile float, float* undef 814 %cmp25 = fcmp une float %val25, undef 815 br i1 %cmp25, label %26, label %25 816 %val26 = load volatile float, float* undef 817 %cmp26 = fcmp une float %val26, undef 818 br i1 %cmp26, label %27, label %26 819 %val27 = load volatile float, float* undef 820 %cmp27 = fcmp une float %val27, undef 821 br i1 %cmp27, label %28, label %27 822 %val28 = load volatile float, float* undef 823 %cmp28 = fcmp une float %val28, undef 824 br i1 %cmp28, label %29, label %28 825 %val29 = load volatile float, float* undef 826 %cmp29 = fcmp une float %val29, undef 827 br i1 %cmp29, label %30, label %29 828 %val30 = load volatile float, float* undef 829 %cmp30 = fcmp une float %val30, undef 830 br i1 %cmp30, label %31, label %30 831 %val31 = load volatile float, float* undef 832 %cmp31 = fcmp une float %val31, undef 833 br i1 %cmp31, label %32, label %31 834 %val32 = load volatile float, float* undef 835 %cmp32 = fcmp une float %val32, undef 836 br i1 %cmp32, label %33, label %32 837 %val33 = load volatile float, float* undef 838 %cmp33 = fcmp une float %val33, undef 839 br i1 %cmp33, label %34, label %33 840 %val34 = load volatile float, float* undef 841 %cmp34 = fcmp une float %val34, undef 842 br i1 %cmp34, label %35, label %34 843 %val35 = load volatile float, float* undef 844 %cmp35 = fcmp une float %val35, undef 845 br i1 %cmp35, label %36, label %35 846 %val36 = load volatile float, float* undef 847 %cmp36 = fcmp une float %val36, undef 848 br i1 %cmp36, label %37, label %36 849 %val37 = load volatile float, float* undef 850 %cmp37 = fcmp une float %val37, undef 851 br i1 %cmp37, label %38, label %37 852 %val38 = load volatile float, float* undef 853 %cmp38 = fcmp une float %val38, undef 854 br i1 %cmp38, label %39, label %38 855 %val39 = load volatile float, float* undef 856 %cmp39 = fcmp une float %val39, undef 857 br i1 %cmp39, label %40, label %39 858 %val40 = load volatile float, float* undef 859 %cmp40 = fcmp une float %val40, undef 860 br i1 %cmp40, label %41, label %40 861 %val41 = load volatile float, float* undef 862 %cmp41 = fcmp une float %val41, undef 863 br i1 %cmp41, label %42, label %41 864 %val42 = load volatile float, float* undef 865 %cmp42 = fcmp une float %val42, undef 866 br i1 %cmp42, label %43, label %42 867 %val43 = load volatile float, float* undef 868 %cmp43 = fcmp une float %val43, undef 869 br i1 %cmp43, label %44, label %43 870 %val44 = load volatile float, float* undef 871 %cmp44 = fcmp une float %val44, undef 872 br i1 %cmp44, label %45, label %44 873 %val45 = load volatile float, float* undef 874 %cmp45 = fcmp une float %val45, undef 875 br i1 %cmp45, label %46, label %45 876 %val46 = load volatile float, float* undef 877 %cmp46 = fcmp une float %val46, undef 878 br i1 %cmp46, label %47, label %46 879 %val47 = load volatile float, float* undef 880 %cmp47 = fcmp une float %val47, undef 881 br i1 %cmp47, label %48, label %47 882 %val48 = load volatile float, float* undef 883 %cmp48 = fcmp une float %val48, undef 884 br i1 %cmp48, label %49, label %48 885 %val49 = load volatile float, float* undef 886 %cmp49 = fcmp une float %val49, undef 887 br i1 %cmp49, label %50, label %49 888 %val50 = load volatile float, float* undef 889 %cmp50 = fcmp une float %val50, undef 890 br i1 %cmp50, label %51, label %50 891 %val51 = load volatile float, float* undef 892 %cmp51 = fcmp une float %val51, undef 893 br i1 %cmp51, label %52, label %51 894 %val52 = load volatile float, float* undef 895 %cmp52 = fcmp une float %val52, undef 896 br i1 %cmp52, label %53, label %52 897 %val53 = load volatile float, float* undef 898 %cmp53 = fcmp une float %val53, undef 899 br i1 %cmp53, label %54, label %53 900 %val54 = load volatile float, float* undef 901 %cmp54 = fcmp une float %val54, undef 902 br i1 %cmp54, label %55, label %54 903 %val55 = load volatile float, float* undef 904 %cmp55 = fcmp une float %val55, undef 905 br i1 %cmp55, label %56, label %55 906 %val56 = load volatile float, float* undef 907 %cmp56 = fcmp une float %val56, undef 908 br i1 %cmp56, label %57, label %56 909 %val57 = load volatile float, float* undef 910 %cmp57 = fcmp une float %val57, undef 911 br i1 %cmp57, label %58, label %57 912 %val58 = load volatile float, float* undef 913 %cmp58 = fcmp une float %val58, undef 914 br i1 %cmp58, label %59, label %58 915 %val59 = load volatile float, float* undef 916 %cmp59 = fcmp une float %val59, undef 917 br i1 %cmp59, label %60, label %59 918 %val60 = load volatile float, float* undef 919 %cmp60 = fcmp une float %val60, undef 920 br i1 %cmp60, label %61, label %60 921 %val61 = load volatile float, float* undef 922 %cmp61 = fcmp une float %val61, undef 923 br i1 %cmp61, label %62, label %61 924 %val62 = load volatile float, float* undef 925 %cmp62 = fcmp une float %val62, undef 926 br i1 %cmp62, label %63, label %62 927 %val63 = load volatile float, float* undef 928 %cmp63 = fcmp une float %val63, undef 929 br i1 %cmp63, label %64, label %63 930 %val64 = load volatile float, float* undef 931 %cmp64 = fcmp une float %val64, undef 932 br i1 %cmp64, label %65, label %64 933 934 br label %exit 935 exit: 936 ret void 937 } 938 939 define void @benchmark_heapsort(i32 %n, double* nocapture %ra) { 940 ; This test case comes from the heapsort benchmark, and exemplifies several 941 ; important aspects to block placement in the presence of loops: 942 ; 1) Loop rotation needs to *ensure* that the desired exiting edge can be 943 ; a fallthrough. 944 ; 2) The exiting edge from the loop which is rotated to be laid out at the 945 ; bottom of the loop needs to be exiting into the nearest enclosing loop (to 946 ; which there is an exit). Otherwise, we force that enclosing loop into 947 ; strange layouts that are siginificantly less efficient, often times making 948 ; it discontiguous. 949 ; 950 ; CHECK-LABEL: @benchmark_heapsort 951 ; CHECK: %entry 952 ; First rotated loop top. 953 ; CHECK: .p2align 954 ; CHECK: %while.end 955 ; %for.cond gets completely tail-duplicated away. 956 ; CHECK: %if.then 957 ; CHECK: %if.else 958 ; CHECK: %if.end10 959 ; Second rotated loop top 960 ; CHECK: .p2align 961 ; CHECK: %if.then24 962 ; CHECK: %while.cond.outer 963 ; Third rotated loop top 964 ; CHECK: .p2align 965 ; CHECK: %while.cond 966 ; CHECK: %while.body 967 ; CHECK: %land.lhs.true 968 ; CHECK: %if.then19 969 ; CHECK: %if.end20 970 ; CHECK: %if.then8 971 ; CHECK: ret 972 973 entry: 974 %shr = ashr i32 %n, 1 975 %add = add nsw i32 %shr, 1 976 %arrayidx3 = getelementptr inbounds double, double* %ra, i64 1 977 br label %for.cond 978 979 for.cond: 980 %ir.0 = phi i32 [ %n, %entry ], [ %ir.1, %while.end ] 981 %l.0 = phi i32 [ %add, %entry ], [ %l.1, %while.end ] 982 %cmp = icmp sgt i32 %l.0, 1 983 br i1 %cmp, label %if.then, label %if.else 984 985 if.then: 986 %dec = add nsw i32 %l.0, -1 987 %idxprom = sext i32 %dec to i64 988 %arrayidx = getelementptr inbounds double, double* %ra, i64 %idxprom 989 %0 = load double, double* %arrayidx, align 8 990 br label %if.end10 991 992 if.else: 993 %idxprom1 = sext i32 %ir.0 to i64 994 %arrayidx2 = getelementptr inbounds double, double* %ra, i64 %idxprom1 995 %1 = load double, double* %arrayidx2, align 8 996 %2 = load double, double* %arrayidx3, align 8 997 store double %2, double* %arrayidx2, align 8 998 %dec6 = add nsw i32 %ir.0, -1 999 %cmp7 = icmp eq i32 %dec6, 1 1000 br i1 %cmp7, label %if.then8, label %if.end10 1001 1002 if.then8: 1003 store double %1, double* %arrayidx3, align 8 1004 ret void 1005 1006 if.end10: 1007 %ir.1 = phi i32 [ %ir.0, %if.then ], [ %dec6, %if.else ] 1008 %l.1 = phi i32 [ %dec, %if.then ], [ %l.0, %if.else ] 1009 %rra.0 = phi double [ %0, %if.then ], [ %1, %if.else ] 1010 %add31 = add nsw i32 %ir.1, 1 1011 br label %while.cond.outer 1012 1013 while.cond.outer: 1014 %j.0.ph.in = phi i32 [ %l.1, %if.end10 ], [ %j.1, %if.then24 ] 1015 %j.0.ph = shl i32 %j.0.ph.in, 1 1016 br label %while.cond 1017 1018 while.cond: 1019 %j.0 = phi i32 [ %add31, %if.end20 ], [ %j.0.ph, %while.cond.outer ] 1020 %cmp11 = icmp sgt i32 %j.0, %ir.1 1021 br i1 %cmp11, label %while.end, label %while.body 1022 1023 while.body: 1024 %cmp12 = icmp slt i32 %j.0, %ir.1 1025 br i1 %cmp12, label %land.lhs.true, label %if.end20 1026 1027 land.lhs.true: 1028 %idxprom13 = sext i32 %j.0 to i64 1029 %arrayidx14 = getelementptr inbounds double, double* %ra, i64 %idxprom13 1030 %3 = load double, double* %arrayidx14, align 8 1031 %add15 = add nsw i32 %j.0, 1 1032 %idxprom16 = sext i32 %add15 to i64 1033 %arrayidx17 = getelementptr inbounds double, double* %ra, i64 %idxprom16 1034 %4 = load double, double* %arrayidx17, align 8 1035 %cmp18 = fcmp olt double %3, %4 1036 br i1 %cmp18, label %if.then19, label %if.end20 1037 1038 if.then19: 1039 br label %if.end20 1040 1041 if.end20: 1042 %j.1 = phi i32 [ %add15, %if.then19 ], [ %j.0, %land.lhs.true ], [ %j.0, %while.body ] 1043 %idxprom21 = sext i32 %j.1 to i64 1044 %arrayidx22 = getelementptr inbounds double, double* %ra, i64 %idxprom21 1045 %5 = load double, double* %arrayidx22, align 8 1046 %cmp23 = fcmp olt double %rra.0, %5 1047 br i1 %cmp23, label %if.then24, label %while.cond 1048 1049 if.then24: 1050 %idxprom27 = sext i32 %j.0.ph.in to i64 1051 %arrayidx28 = getelementptr inbounds double, double* %ra, i64 %idxprom27 1052 store double %5, double* %arrayidx28, align 8 1053 br label %while.cond.outer 1054 1055 while.end: 1056 %idxprom33 = sext i32 %j.0.ph.in to i64 1057 %arrayidx34 = getelementptr inbounds double, double* %ra, i64 %idxprom33 1058 store double %rra.0, double* %arrayidx34, align 8 1059 br label %for.cond 1060 } 1061 1062 declare void @cold_function() cold 1063 1064 define i32 @test_cold_calls(i32* %a) { 1065 ; Test that edges to blocks post-dominated by cold calls are 1066 ; marked as not expected to be taken. They should be laid out 1067 ; at the bottom. 1068 ; CHECK-LABEL: test_cold_calls: 1069 ; CHECK: %entry 1070 ; CHECK: %else 1071 ; CHECK: %exit 1072 ; CHECK: %then 1073 1074 entry: 1075 %gep1 = getelementptr i32, i32* %a, i32 1 1076 %val1 = load i32, i32* %gep1 1077 %cond1 = icmp ugt i32 %val1, 1 1078 br i1 %cond1, label %then, label %else 1079 1080 then: 1081 call void @cold_function() 1082 br label %exit 1083 1084 else: 1085 %gep2 = getelementptr i32, i32* %a, i32 2 1086 %val2 = load i32, i32* %gep2 1087 br label %exit 1088 1089 exit: 1090 %ret = phi i32 [ %val1, %then ], [ %val2, %else ] 1091 ret i32 %ret 1092 } 1093 1094 ; Make sure we put landingpads out of the way. 1095 declare i32 @pers(...) 1096 1097 declare i32 @foo(); 1098 1099 declare i32 @bar(); 1100 1101 define i32 @test_lp(i32 %a) personality i32 (...)* @pers { 1102 ; CHECK-LABEL: test_lp: 1103 ; CHECK: %entry 1104 ; CHECK: %hot 1105 ; CHECK: %then 1106 ; CHECK: %cold 1107 ; CHECK: %coldlp 1108 ; CHECK: %hotlp 1109 ; CHECK: %lpret 1110 entry: 1111 %0 = icmp sgt i32 %a, 1 1112 br i1 %0, label %hot, label %cold, !prof !4 1113 1114 hot: 1115 %1 = invoke i32 @foo() 1116 to label %then unwind label %hotlp 1117 1118 cold: 1119 %2 = invoke i32 @bar() 1120 to label %then unwind label %coldlp 1121 1122 then: 1123 %3 = phi i32 [ %1, %hot ], [ %2, %cold ] 1124 ret i32 %3 1125 1126 hotlp: 1127 %4 = landingpad { i8*, i32 } 1128 cleanup 1129 br label %lpret 1130 1131 coldlp: 1132 %5 = landingpad { i8*, i32 } 1133 cleanup 1134 br label %lpret 1135 1136 lpret: 1137 %6 = phi i32 [-1, %hotlp], [-2, %coldlp] 1138 %7 = add i32 %6, 42 1139 ret i32 %7 1140 } 1141 1142 !4 = !{!"branch_weights", i32 65536, i32 0} 1143 1144 ; Make sure that ehpad are scheduled from the least probable one 1145 ; to the most probable one. See selectBestCandidateBlock as to why. 1146 declare void @clean(); 1147 1148 define void @test_flow_unwind() personality i32 (...)* @pers { 1149 ; CHECK-LABEL: test_flow_unwind: 1150 ; CHECK: %entry 1151 ; CHECK: %then 1152 ; CHECK: %exit 1153 ; CHECK: %innerlp 1154 ; CHECK: %outerlp 1155 ; CHECK: %outercleanup 1156 entry: 1157 %0 = invoke i32 @foo() 1158 to label %then unwind label %outerlp 1159 1160 then: 1161 %1 = invoke i32 @bar() 1162 to label %exit unwind label %innerlp 1163 1164 exit: 1165 ret void 1166 1167 innerlp: 1168 %2 = landingpad { i8*, i32 } 1169 cleanup 1170 br label %innercleanup 1171 1172 outerlp: 1173 %3 = landingpad { i8*, i32 } 1174 cleanup 1175 br label %outercleanup 1176 1177 outercleanup: 1178 %4 = phi { i8*, i32 } [%2, %innercleanup], [%3, %outerlp] 1179 call void @clean() 1180 resume { i8*, i32 } %4 1181 1182 innercleanup: 1183 call void @clean() 1184 br label %outercleanup 1185 } 1186 1187 declare void @hot_function() 1188 1189 define void @test_hot_branch(i32* %a) { 1190 ; Test that a hot branch that has a probability a little larger than 80% will 1191 ; break CFG constrains when doing block placement. 1192 ; CHECK-LABEL: test_hot_branch: 1193 ; CHECK: %entry 1194 ; CHECK: %then 1195 ; CHECK: %exit 1196 ; CHECK: %else 1197 1198 entry: 1199 %gep1 = getelementptr i32, i32* %a, i32 1 1200 %val1 = load i32, i32* %gep1 1201 %cond1 = icmp ugt i32 %val1, 1 1202 br i1 %cond1, label %then, label %else, !prof !5 1203 1204 then: 1205 call void @hot_function() 1206 br label %exit 1207 1208 else: 1209 call void @cold_function() 1210 br label %exit 1211 1212 exit: 1213 call void @hot_function() 1214 ret void 1215 } 1216 1217 define void @test_hot_branch_profile(i32* %a) !prof !6 { 1218 ; Test that a hot branch that has a probability a little larger than 50% will 1219 ; break CFG constrains when doing block placement when profile is available. 1220 ; CHECK-LABEL: test_hot_branch_profile: 1221 ; CHECK: %entry 1222 ; CHECK: %then 1223 ; CHECK: %exit 1224 ; CHECK: %else 1225 1226 entry: 1227 %gep1 = getelementptr i32, i32* %a, i32 1 1228 %val1 = load i32, i32* %gep1 1229 %cond1 = icmp ugt i32 %val1, 1 1230 br i1 %cond1, label %then, label %else, !prof !7 1231 1232 then: 1233 call void @hot_function() 1234 br label %exit 1235 1236 else: 1237 call void @cold_function() 1238 br label %exit 1239 1240 exit: 1241 call void @hot_function() 1242 ret void 1243 } 1244 1245 define void @test_hot_branch_triangle_profile(i32* %a) !prof !6 { 1246 ; Test that a hot branch that has a probability a little larger than 80% will 1247 ; break triangle shaped CFG constrains when doing block placement if profile 1248 ; is present. 1249 ; CHECK-LABEL: test_hot_branch_triangle_profile: 1250 ; CHECK: %entry 1251 ; CHECK: %exit 1252 ; CHECK: %then 1253 1254 entry: 1255 %gep1 = getelementptr i32, i32* %a, i32 1 1256 %val1 = load i32, i32* %gep1 1257 %cond1 = icmp ugt i32 %val1, 1 1258 br i1 %cond1, label %exit, label %then, !prof !5 1259 1260 then: 1261 call void @hot_function() 1262 br label %exit 1263 1264 exit: 1265 call void @hot_function() 1266 ret void 1267 } 1268 1269 define void @test_hot_branch_triangle_profile_topology(i32* %a) !prof !6 { 1270 ; Test that a hot branch that has a probability between 50% and 66% will not 1271 ; break triangle shaped CFG constrains when doing block placement if profile 1272 ; is present. 1273 ; CHECK-LABEL: test_hot_branch_triangle_profile_topology: 1274 ; CHECK: %entry 1275 ; CHECK: %then 1276 ; CHECK: %exit 1277 1278 entry: 1279 %gep1 = getelementptr i32, i32* %a, i32 1 1280 %val1 = load i32, i32* %gep1 1281 %cond1 = icmp ugt i32 %val1, 1 1282 br i1 %cond1, label %exit, label %then, !prof !7 1283 1284 then: 1285 call void @hot_function() 1286 br label %exit 1287 1288 exit: 1289 call void @hot_function() 1290 ret void 1291 } 1292 1293 declare void @a() 1294 declare void @b() 1295 1296 define void @test_forked_hot_diamond(i32* %a) { 1297 ; Test that a hot-branch with probability > 80% followed by a 50/50 branch 1298 ; will not place the cold predecessor if the probability for the fallthrough 1299 ; remains above 80% 1300 ; CHECK-LABEL: test_forked_hot_diamond 1301 ; CHECK: %entry 1302 ; CHECK: %then 1303 ; CHECK: %fork1 1304 ; CHECK: %else 1305 ; CHECK: %fork2 1306 ; CHECK: %exit 1307 entry: 1308 %gep1 = getelementptr i32, i32* %a, i32 1 1309 %val1 = load i32, i32* %gep1 1310 %cond1 = icmp ugt i32 %val1, 1 1311 br i1 %cond1, label %then, label %else, !prof !5 1312 1313 then: 1314 call void @hot_function() 1315 %gep2 = getelementptr i32, i32* %a, i32 2 1316 %val2 = load i32, i32* %gep2 1317 %cond2 = icmp ugt i32 %val2, 2 1318 br i1 %cond2, label %fork1, label %fork2, !prof !8 1319 1320 else: 1321 call void @cold_function() 1322 %gep3 = getelementptr i32, i32* %a, i32 3 1323 %val3 = load i32, i32* %gep3 1324 %cond3 = icmp ugt i32 %val3, 3 1325 br i1 %cond3, label %fork1, label %fork2, !prof !8 1326 1327 fork1: 1328 call void @a() 1329 br label %exit 1330 1331 fork2: 1332 call void @b() 1333 br label %exit 1334 1335 exit: 1336 call void @hot_function() 1337 ret void 1338 } 1339 1340 define void @test_forked_hot_diamond_gets_cold(i32* %a) { 1341 ; Test that a hot-branch with probability > 80% followed by a 50/50 branch 1342 ; will place the cold predecessor if the probability for the fallthrough 1343 ; falls below 80% 1344 ; The probability for both branches is 85%. For then2 vs else1 1345 ; this results in a compounded probability of 83%. 1346 ; Neither then2->fork1 nor then2->fork2 has a large enough relative 1347 ; probability to break the CFG. 1348 ; Relative probs: 1349 ; then2 -> fork1 vs else1 -> fork1 = 71% 1350 ; then2 -> fork2 vs else2 -> fork2 = 74% 1351 ; CHECK-LABEL: test_forked_hot_diamond_gets_cold 1352 ; CHECK: %entry 1353 ; CHECK: %then1 1354 ; CHECK: %then2 1355 ; CHECK: %else1 1356 ; CHECK: %fork1 1357 ; CHECK: %else2 1358 ; CHECK: %fork2 1359 ; CHECK: %exit 1360 entry: 1361 %gep1 = getelementptr i32, i32* %a, i32 1 1362 %val1 = load i32, i32* %gep1 1363 %cond1 = icmp ugt i32 %val1, 1 1364 br i1 %cond1, label %then1, label %else1, !prof !9 1365 1366 then1: 1367 call void @hot_function() 1368 %gep2 = getelementptr i32, i32* %a, i32 2 1369 %val2 = load i32, i32* %gep2 1370 %cond2 = icmp ugt i32 %val2, 2 1371 br i1 %cond2, label %then2, label %else2, !prof !9 1372 1373 else1: 1374 call void @cold_function() 1375 br label %fork1 1376 1377 then2: 1378 call void @hot_function() 1379 %gep3 = getelementptr i32, i32* %a, i32 3 1380 %val3 = load i32, i32* %gep2 1381 %cond3 = icmp ugt i32 %val2, 3 1382 br i1 %cond3, label %fork1, label %fork2, !prof !8 1383 1384 else2: 1385 call void @cold_function() 1386 br label %fork2 1387 1388 fork1: 1389 call void @a() 1390 br label %exit 1391 1392 fork2: 1393 call void @b() 1394 br label %exit 1395 1396 exit: 1397 call void @hot_function() 1398 ret void 1399 } 1400 1401 define void @test_forked_hot_diamond_stays_hot(i32* %a) { 1402 ; Test that a hot-branch with probability > 88.88% (1:8) followed by a 50/50 1403 ; branch will not place the cold predecessor as the probability for the 1404 ; fallthrough stays above 80% 1405 ; (1:8) followed by (1:1) is still (1:4) 1406 ; Here we use 90% probability because two in a row 1407 ; have a 89 % probability vs the original branch. 1408 ; CHECK-LABEL: test_forked_hot_diamond_stays_hot 1409 ; CHECK: %entry 1410 ; CHECK: %then1 1411 ; CHECK: %then2 1412 ; CHECK: %fork1 1413 ; CHECK: %else1 1414 ; CHECK: %else2 1415 ; CHECK: %fork2 1416 ; CHECK: %exit 1417 entry: 1418 %gep1 = getelementptr i32, i32* %a, i32 1 1419 %val1 = load i32, i32* %gep1 1420 %cond1 = icmp ugt i32 %val1, 1 1421 br i1 %cond1, label %then1, label %else1, !prof !10 1422 1423 then1: 1424 call void @hot_function() 1425 %gep2 = getelementptr i32, i32* %a, i32 2 1426 %val2 = load i32, i32* %gep2 1427 %cond2 = icmp ugt i32 %val2, 2 1428 br i1 %cond2, label %then2, label %else2, !prof !10 1429 1430 else1: 1431 call void @cold_function() 1432 br label %fork1 1433 1434 then2: 1435 call void @hot_function() 1436 %gep3 = getelementptr i32, i32* %a, i32 3 1437 %val3 = load i32, i32* %gep2 1438 %cond3 = icmp ugt i32 %val2, 3 1439 br i1 %cond3, label %fork1, label %fork2, !prof !8 1440 1441 else2: 1442 call void @cold_function() 1443 br label %fork2 1444 1445 fork1: 1446 call void @a() 1447 br label %exit 1448 1449 fork2: 1450 call void @b() 1451 br label %exit 1452 1453 exit: 1454 call void @hot_function() 1455 ret void 1456 } 1457 1458 ; Because %endif has a higher frequency than %if, the calculations show we 1459 ; shouldn't tail-duplicate %endif so that we can place it after %if. We were 1460 ; previously undercounting the cost by ignoring execution frequency that didn't 1461 ; come from the %if->%endif path. 1462 ; CHECK-LABEL: higher_frequency_succ_tail_dup 1463 ; CHECK: %entry 1464 ; CHECK: %elseif 1465 ; CHECK: %else 1466 ; CHECK: %endif 1467 ; CHECK: %then 1468 ; CHECK: %ret 1469 define void @higher_frequency_succ_tail_dup(i1 %a, i1 %b, i1 %c) { 1470 entry: 1471 br label %if 1472 if: ; preds = %entry 1473 call void @effect(i32 0) 1474 br i1 %a, label %elseif, label %endif, !prof !11 ; even 1475 1476 elseif: ; preds = %if 1477 call void @effect(i32 1) 1478 br i1 %b, label %else, label %endif, !prof !11 ; even 1479 1480 else: ; preds = %elseif 1481 call void @effect(i32 2) 1482 br label %endif 1483 1484 endif: ; preds = %if, %elseif, %else 1485 br i1 %c, label %then, label %ret, !prof !12 ; 5 to 3 1486 1487 then: ; preds = %endif 1488 call void @effect(i32 3) 1489 br label %ret 1490 1491 ret: ; preds = %endif, %then 1492 ret void 1493 } 1494 1495 define i32 @not_rotate_if_extra_branch(i32 %count) { 1496 ; Test checks that there is no loop rotation 1497 ; if it introduces extra branch. 1498 ; Specifically in this case because best exit is .header 1499 ; but it has fallthrough to .middle block and last block in 1500 ; loop chain .slow does not have afallthrough to .header. 1501 ; CHECK-LABEL: not_rotate_if_extra_branch 1502 ; CHECK: %.entry 1503 ; CHECK: %.header 1504 ; CHECK: %.middle 1505 ; CHECK: %.backedge 1506 ; CHECK: %.slow 1507 ; CHECK: %.bailout 1508 ; CHECK: %.stop 1509 .entry: 1510 %sum.0 = shl nsw i32 %count, 1 1511 br label %.header 1512 1513 .header: 1514 %i = phi i32 [ %i.1, %.backedge ], [ 0, %.entry ] 1515 %sum = phi i32 [ %sum.1, %.backedge ], [ %sum.0, %.entry ] 1516 %is_exc = icmp sgt i32 %i, 9000000 1517 br i1 %is_exc, label %.bailout, label %.middle, !prof !13 1518 1519 .bailout: 1520 %sum.2 = add nsw i32 %count, 1 1521 br label %.stop 1522 1523 .middle: 1524 %pr.1 = and i32 %i, 1023 1525 %pr.2 = icmp eq i32 %pr.1, 0 1526 br i1 %pr.2, label %.slow, label %.backedge, !prof !14 1527 1528 .slow: 1529 tail call void @effect(i32 %sum) 1530 br label %.backedge 1531 1532 .backedge: 1533 %sum.1 = add nsw i32 %i, %sum 1534 %i.1 = add nsw i32 %i, 1 1535 %end = icmp slt i32 %i.1, %count 1536 br i1 %end, label %.header, label %.stop, !prof !15 1537 1538 .stop: 1539 %sum.phi = phi i32 [ %sum.1, %.backedge ], [ %sum.2, %.bailout ] 1540 ret i32 %sum.phi 1541 } 1542 1543 define i32 @not_rotate_if_extra_branch_regression(i32 %count, i32 %init) { 1544 ; This is a regression test against patch avoid loop rotation if 1545 ; it introduce an extra btanch. 1546 ; CHECK-LABEL: not_rotate_if_extra_branch_regression 1547 ; CHECK: %.entry 1548 ; CHECK: %.first_backedge 1549 ; CHECK: %.slow 1550 ; CHECK: %.second_header 1551 .entry: 1552 %sum.0 = shl nsw i32 %count, 1 1553 br label %.first_header 1554 1555 .first_header: 1556 %i = phi i32 [ %i.1, %.first_backedge ], [ 0, %.entry ] 1557 %is_bo1 = icmp sgt i32 %i, 9000000 1558 br i1 %is_bo1, label %.bailout, label %.first_backedge, !prof !14 1559 1560 .first_backedge: 1561 %i.1 = add nsw i32 %i, 1 1562 %end = icmp slt i32 %i.1, %count 1563 br i1 %end, label %.first_header, label %.second_header, !prof !13 1564 1565 .second_header: 1566 %j = phi i32 [ %j.1, %.second_backedge ], [ %init, %.first_backedge ] 1567 %end.2 = icmp sgt i32 %j, %count 1568 br i1 %end.2, label %.stop, label %.second_middle, !prof !14 1569 1570 .second_middle: 1571 %is_slow = icmp sgt i32 %j, 9000000 1572 br i1 %is_slow, label %.slow, label %.second_backedge, !prof !14 1573 1574 .slow: 1575 tail call void @effect(i32 %j) 1576 br label %.second_backedge 1577 1578 .second_backedge: 1579 %j.1 = add nsw i32 %j, 1 1580 %end.3 = icmp slt i32 %j, 10000000 1581 br i1 %end.3, label %.second_header, label %.stop, !prof !13 1582 1583 .stop: 1584 %res = add nsw i32 %j, %i.1 1585 ret i32 %res 1586 1587 .bailout: 1588 ret i32 0 1589 } 1590 1591 declare void @effect(i32) 1592 1593 !5 = !{!"branch_weights", i32 84, i32 16} 1594 !6 = !{!"function_entry_count", i32 10} 1595 !7 = !{!"branch_weights", i32 60, i32 40} 1596 !8 = !{!"branch_weights", i32 5001, i32 4999} 1597 !9 = !{!"branch_weights", i32 85, i32 15} 1598 !10 = !{!"branch_weights", i32 90, i32 10} 1599 !11 = !{!"branch_weights", i32 1, i32 1} 1600 !12 = !{!"branch_weights", i32 5, i32 3} 1601 !13 = !{!"branch_weights", i32 1, i32 1} 1602 !14 = !{!"branch_weights", i32 1, i32 1023} 1603 !15 = !{!"branch_weights", i32 4095, i32 1} 1604