Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -mtriple=i686-linux -pre-RA-sched=source < %s | FileCheck %s
      2 ; RUN: opt -disable-output -debugify < %s
      3 
      4 declare void @error(i32 %i, i32 %a, i32 %b)
      5 
      6 define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) {
      7 ; Test a chain of ifs, where the block guarded by the if is error handling code
      8 ; that is not expected to run.
      9 ; CHECK-LABEL: test_ifchains:
     10 ; CHECK: %entry
     11 ; CHECK-NOT: .p2align
     12 ; CHECK: %else1
     13 ; CHECK-NOT: .p2align
     14 ; CHECK: %else2
     15 ; CHECK-NOT: .p2align
     16 ; CHECK: %else3
     17 ; CHECK-NOT: .p2align
     18 ; CHECK: %else4
     19 ; CHECK-NOT: .p2align
     20 ; CHECK: %exit
     21 ; CHECK: %then1
     22 ; CHECK: %then2
     23 ; CHECK: %then3
     24 ; CHECK: %then4
     25 ; CHECK: %then5
     26 
     27 entry:
     28   %gep1 = getelementptr i32, i32* %a, i32 1
     29   %val1 = load i32, i32* %gep1
     30   %cond1 = icmp ugt i32 %val1, 1
     31   br i1 %cond1, label %then1, label %else1, !prof !0
     32 
     33 then1:
     34   call void @error(i32 %i, i32 1, i32 %b)
     35   br label %else1
     36 
     37 else1:
     38   %gep2 = getelementptr i32, i32* %a, i32 2
     39   %val2 = load i32, i32* %gep2
     40   %cond2 = icmp ugt i32 %val2, 2
     41   br i1 %cond2, label %then2, label %else2, !prof !0
     42 
     43 then2:
     44   call void @error(i32 %i, i32 1, i32 %b)
     45   br label %else2
     46 
     47 else2:
     48   %gep3 = getelementptr i32, i32* %a, i32 3
     49   %val3 = load i32, i32* %gep3
     50   %cond3 = icmp ugt i32 %val3, 3
     51   br i1 %cond3, label %then3, label %else3, !prof !0
     52 
     53 then3:
     54   call void @error(i32 %i, i32 1, i32 %b)
     55   br label %else3
     56 
     57 else3:
     58   %gep4 = getelementptr i32, i32* %a, i32 4
     59   %val4 = load i32, i32* %gep4
     60   %cond4 = icmp ugt i32 %val4, 4
     61   br i1 %cond4, label %then4, label %else4, !prof !0
     62 
     63 then4:
     64   call void @error(i32 %i, i32 1, i32 %b)
     65   br label %else4
     66 
     67 else4:
     68   %gep5 = getelementptr i32, i32* %a, i32 3
     69   %val5 = load i32, i32* %gep5
     70   %cond5 = icmp ugt i32 %val5, 3
     71   br i1 %cond5, label %then5, label %exit, !prof !0
     72 
     73 then5:
     74   call void @error(i32 %i, i32 1, i32 %b)
     75   br label %exit
     76 
     77 exit:
     78   ret i32 %b
     79 }
     80 
     81 define i32 @test_loop_cold_blocks(i32 %i, i32* %a) {
     82 ; Check that we sink cold loop blocks after the hot loop body.
     83 ; CHECK-LABEL: test_loop_cold_blocks:
     84 ; CHECK: %entry
     85 ; CHECK-NOT: .p2align
     86 ; CHECK: %unlikely1
     87 ; CHECK-NOT: .p2align
     88 ; CHECK: %unlikely2
     89 ; CHECK: .p2align
     90 ; CHECK: %body1
     91 ; CHECK: %body2
     92 ; CHECK: %body3
     93 ; CHECK: %exit
     94 
     95 entry:
     96   br label %body1
     97 
     98 body1:
     99   %iv = phi i32 [ 0, %entry ], [ %next, %body3 ]
    100   %base = phi i32 [ 0, %entry ], [ %sum, %body3 ]
    101   %unlikelycond1 = icmp slt i32 %base, 42
    102   br i1 %unlikelycond1, label %unlikely1, label %body2, !prof !0
    103 
    104 unlikely1:
    105   call void @error(i32 %i, i32 1, i32 %base)
    106   br label %body2
    107 
    108 body2:
    109   %unlikelycond2 = icmp sgt i32 %base, 21
    110   br i1 %unlikelycond2, label %unlikely2, label %body3, !prof !0
    111 
    112 unlikely2:
    113   call void @error(i32 %i, i32 2, i32 %base)
    114   br label %body3
    115 
    116 body3:
    117   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
    118   %0 = load i32, i32* %arrayidx
    119   %sum = add nsw i32 %0, %base
    120   %next = add i32 %iv, 1
    121   %exitcond = icmp eq i32 %next, %i
    122   br i1 %exitcond, label %exit, label %body1
    123 
    124 exit:
    125   ret i32 %sum
    126 }
    127 
    128 !0 = !{!"branch_weights", i32 4, i32 64}
    129 
    130 define i32 @test_loop_early_exits(i32 %i, i32* %a) {
    131 ; Check that we sink early exit blocks out of loop bodies.
    132 ; CHECK-LABEL: test_loop_early_exits:
    133 ; CHECK: %entry
    134 ; CHECK: %body1
    135 ; CHECK: %body2
    136 ; CHECK: %body3
    137 ; CHECK: %body4
    138 ; CHECK: %exit
    139 ; CHECK: %bail1
    140 ; CHECK: %bail2
    141 ; CHECK: %bail3
    142 
    143 entry:
    144   br label %body1
    145 
    146 body1:
    147   %iv = phi i32 [ 0, %entry ], [ %next, %body4 ]
    148   %base = phi i32 [ 0, %entry ], [ %sum, %body4 ]
    149   %bailcond1 = icmp eq i32 %base, 42
    150   br i1 %bailcond1, label %bail1, label %body2
    151 
    152 bail1:
    153   ret i32 -1
    154 
    155 body2:
    156   %bailcond2 = icmp eq i32 %base, 43
    157   br i1 %bailcond2, label %bail2, label %body3
    158 
    159 bail2:
    160   ret i32 -2
    161 
    162 body3:
    163   %bailcond3 = icmp eq i32 %base, 44
    164   br i1 %bailcond3, label %bail3, label %body4
    165 
    166 bail3:
    167   ret i32 -3
    168 
    169 body4:
    170   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
    171   %0 = load i32, i32* %arrayidx
    172   %sum = add nsw i32 %0, %base
    173   %next = add i32 %iv, 1
    174   %exitcond = icmp eq i32 %next, %i
    175   br i1 %exitcond, label %exit, label %body1
    176 
    177 exit:
    178   ret i32 %sum
    179 }
    180 
    181 ; Tail duplication during layout can entirely remove body0 by duplicating it
    182 ; into the entry block and into body1. This is a good thing but it isn't what
    183 ; this test is looking for. So to make the blocks longer so they don't get
    184 ; duplicated, we add some calls to dummy.
    185 declare void @dummy()
    186 
    187 define i32 @test_loop_rotate(i32 %i, i32* %a) {
    188 ; Check that we rotate conditional exits from the loop to the bottom of the
    189 ; loop, eliminating unconditional branches to the top.
    190 ; CHECK-LABEL: test_loop_rotate:
    191 ; CHECK: %entry
    192 ; CHECK: %body1
    193 ; CHECK: %body0
    194 ; CHECK: %exit
    195 
    196 entry:
    197   br label %body0
    198 
    199 body0:
    200   %iv = phi i32 [ 0, %entry ], [ %next, %body1 ]
    201   %base = phi i32 [ 0, %entry ], [ %sum, %body1 ]
    202   %next = add i32 %iv, 1
    203   %exitcond = icmp eq i32 %next, %i
    204   call void @dummy()
    205   call void @dummy()
    206   br i1 %exitcond, label %exit, label %body1
    207 
    208 body1:
    209   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
    210   %0 = load i32, i32* %arrayidx
    211   %sum = add nsw i32 %0, %base
    212   %bailcond1 = icmp eq i32 %sum, 42
    213   br label %body0
    214 
    215 exit:
    216   ret i32 %base
    217 }
    218 
    219 define i32 @test_no_loop_rotate(i32 %i, i32* %a) {
    220 ; Check that we don't try to rotate a loop which is already laid out with
    221 ; fallthrough opportunities into the top and out of the bottom.
    222 ; CHECK-LABEL: test_no_loop_rotate:
    223 ; CHECK: %entry
    224 ; CHECK: %body0
    225 ; CHECK: %body1
    226 ; CHECK: %exit
    227 
    228 entry:
    229   br label %body0
    230 
    231 body0:
    232   %iv = phi i32 [ 0, %entry ], [ %next, %body1 ]
    233   %base = phi i32 [ 0, %entry ], [ %sum, %body1 ]
    234   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
    235   %0 = load i32, i32* %arrayidx
    236   %sum = add nsw i32 %0, %base
    237   %bailcond1 = icmp eq i32 %sum, 42
    238   br i1 %bailcond1, label %exit, label %body1
    239 
    240 body1:
    241   %next = add i32 %iv, 1
    242   %exitcond = icmp eq i32 %next, %i
    243   br i1 %exitcond, label %exit, label %body0
    244 
    245 exit:
    246   ret i32 %base
    247 }
    248 
    249 define i32 @test_loop_align(i32 %i, i32* %a) {
    250 ; Check that we provide basic loop body alignment with the block placement
    251 ; pass.
    252 ; CHECK-LABEL: test_loop_align:
    253 ; CHECK: %entry
    254 ; CHECK: .p2align [[ALIGN:[0-9]+]],
    255 ; CHECK-NEXT: %body
    256 ; CHECK: %exit
    257 
    258 entry:
    259   br label %body
    260 
    261 body:
    262   %iv = phi i32 [ 0, %entry ], [ %next, %body ]
    263   %base = phi i32 [ 0, %entry ], [ %sum, %body ]
    264   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
    265   %0 = load i32, i32* %arrayidx
    266   %sum = add nsw i32 %0, %base
    267   %next = add i32 %iv, 1
    268   %exitcond = icmp eq i32 %next, %i
    269   br i1 %exitcond, label %exit, label %body
    270 
    271 exit:
    272   ret i32 %sum
    273 }
    274 
    275 define i32 @test_nested_loop_align(i32 %i, i32* %a, i32* %b) {
    276 ; Check that we provide nested loop body alignment.
    277 ; CHECK-LABEL: test_nested_loop_align:
    278 ; CHECK: %entry
    279 ; CHECK: .p2align [[ALIGN]],
    280 ; CHECK-NEXT: %loop.body.1
    281 ; CHECK: .p2align [[ALIGN]],
    282 ; CHECK-NEXT: %inner.loop.body
    283 ; CHECK-NOT: .p2align
    284 ; CHECK: %exit
    285 
    286 entry:
    287   br label %loop.body.1
    288 
    289 loop.body.1:
    290   %iv = phi i32 [ 0, %entry ], [ %next, %loop.body.2 ]
    291   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
    292   %bidx = load i32, i32* %arrayidx
    293   br label %inner.loop.body
    294 
    295 inner.loop.body:
    296   %inner.iv = phi i32 [ 0, %loop.body.1 ], [ %inner.next, %inner.loop.body ]
    297   %base = phi i32 [ 0, %loop.body.1 ], [ %sum, %inner.loop.body ]
    298   %scaled_idx = mul i32 %bidx, %iv
    299   %inner.arrayidx = getelementptr inbounds i32, i32* %b, i32 %scaled_idx
    300   %0 = load i32, i32* %inner.arrayidx
    301   %sum = add nsw i32 %0, %base
    302   %inner.next = add i32 %iv, 1
    303   %inner.exitcond = icmp eq i32 %inner.next, %i
    304   br i1 %inner.exitcond, label %loop.body.2, label %inner.loop.body
    305 
    306 loop.body.2:
    307   %next = add i32 %iv, 1
    308   %exitcond = icmp eq i32 %next, %i
    309   br i1 %exitcond, label %exit, label %loop.body.1
    310 
    311 exit:
    312   ret i32 %sum
    313 }
    314 
    315 define void @unnatural_cfg1() {
    316 ; Test that we can handle a loop with an inner unnatural loop at the end of
    317 ; a function. This is a gross CFG reduced out of the single source GCC.
    318 ; CHECK-LABEL: unnatural_cfg1
    319 ; CHECK: %entry
    320 ; CHECK: %loop.header
    321 ; CHECK: %loop.body2
    322 ; CHECK: %loop.body3
    323 
    324 entry:
    325   br label %loop.header
    326 
    327 loop.header:
    328   br label %loop.body1
    329 
    330 loop.body1:
    331   br i1 undef, label %loop.body3, label %loop.body2
    332 
    333 loop.body2:
    334   %ptr = load i32*, i32** undef, align 4
    335   br label %loop.body3
    336 
    337 loop.body3:
    338   %myptr = phi i32* [ %ptr2, %loop.body5 ], [ %ptr, %loop.body2 ], [ undef, %loop.body1 ]
    339   %bcmyptr = bitcast i32* %myptr to i32*
    340   %val = load i32, i32* %bcmyptr, align 4
    341   %comp = icmp eq i32 %val, 48
    342   br i1 %comp, label %loop.body4, label %loop.body5
    343 
    344 loop.body4:
    345   br i1 undef, label %loop.header, label %loop.body5
    346 
    347 loop.body5:
    348   %ptr2 = load i32*, i32** undef, align 4
    349   br label %loop.body3
    350 }
    351 
    352 define void @unnatural_cfg2() {
    353 ; Test that we can handle a loop with a nested natural loop *and* an unnatural
    354 ; loop. This was reduced from a crash on block placement when run over
    355 ; single-source GCC.
    356 ; CHECK-LABEL: unnatural_cfg2
    357 ; CHECK: %entry
    358 ; CHECK: %loop.header
    359 ; CHECK: %loop.body1
    360 ; CHECK: %loop.body2
    361 ; CHECK: %loop.body4
    362 ; CHECK: %loop.inner2.begin
    363 ; CHECK: %loop.inner2.begin
    364 ; CHECK: %loop.body3
    365 ; CHECK: %loop.inner1.begin
    366 ; CHECK: %bail
    367 
    368 entry:
    369   br label %loop.header
    370 
    371 loop.header:
    372   %comp0 = icmp eq i32* undef, null
    373   br i1 %comp0, label %bail, label %loop.body1
    374 
    375 loop.body1:
    376   %val0 = load i32*, i32** undef, align 4
    377   br i1 undef, label %loop.body2, label %loop.inner1.begin
    378 
    379 loop.body2:
    380   br i1 undef, label %loop.body4, label %loop.body3
    381 
    382 loop.body3:
    383   %ptr1 = getelementptr inbounds i32, i32* %val0, i32 0
    384   %castptr1 = bitcast i32* %ptr1 to i32**
    385   %val1 = load i32*, i32** %castptr1, align 4
    386   br label %loop.inner1.begin
    387 
    388 loop.inner1.begin:
    389   %valphi = phi i32* [ %val2, %loop.inner1.end ], [ %val1, %loop.body3 ], [ %val0, %loop.body1 ]
    390   %castval = bitcast i32* %valphi to i32*
    391   %comp1 = icmp eq i32 undef, 48
    392   br i1 %comp1, label %loop.inner1.end, label %loop.body4
    393 
    394 loop.inner1.end:
    395   %ptr2 = getelementptr inbounds i32, i32* %valphi, i32 0
    396   %castptr2 = bitcast i32* %ptr2 to i32**
    397   %val2 = load i32*, i32** %castptr2, align 4
    398   br label %loop.inner1.begin
    399 
    400 loop.body4.dead:
    401   br label %loop.body4
    402 
    403 loop.body4:
    404   %comp2 = icmp ult i32 undef, 3
    405   br i1 %comp2, label %loop.inner2.begin, label %loop.end
    406 
    407 loop.inner2.begin:
    408   br i1 false, label %loop.end, label %loop.inner2.end
    409 
    410 loop.inner2.end:
    411   %comp3 = icmp eq i32 undef, 1769472
    412   br i1 %comp3, label %loop.end, label %loop.inner2.begin
    413 
    414 loop.end:
    415   br label %loop.header
    416 
    417 bail:
    418   unreachable
    419 }
    420 
    421 define i32 @problematic_switch() {
    422 ; This function's CFG caused overlow in the machine branch probability
    423 ; calculation, triggering asserts. Make sure we don't crash on it.
    424 ; CHECK: problematic_switch
    425 
    426 entry:
    427   switch i32 undef, label %exit [
    428     i32 879, label %bogus
    429     i32 877, label %step
    430     i32 876, label %step
    431     i32 875, label %step
    432     i32 874, label %step
    433     i32 873, label %step
    434     i32 872, label %step
    435     i32 868, label %step
    436     i32 867, label %step
    437     i32 866, label %step
    438     i32 861, label %step
    439     i32 860, label %step
    440     i32 856, label %step
    441     i32 855, label %step
    442     i32 854, label %step
    443     i32 831, label %step
    444     i32 830, label %step
    445     i32 829, label %step
    446     i32 828, label %step
    447     i32 815, label %step
    448     i32 814, label %step
    449     i32 811, label %step
    450     i32 806, label %step
    451     i32 805, label %step
    452     i32 804, label %step
    453     i32 803, label %step
    454     i32 802, label %step
    455     i32 801, label %step
    456     i32 800, label %step
    457     i32 799, label %step
    458     i32 798, label %step
    459     i32 797, label %step
    460     i32 796, label %step
    461     i32 795, label %step
    462   ]
    463 bogus:
    464   unreachable
    465 step:
    466   br label %exit
    467 exit:
    468   %merge = phi i32 [ 3, %step ], [ 6, %entry ]
    469   ret i32 %merge
    470 }
    471 
    472 define void @fpcmp_unanalyzable_branch(i1 %cond) {
    473 ; This function's CFG contains an once-unanalyzable branch (une on floating
    474 ; points). As now it becomes analyzable, we should get best layout in which each
    475 ; edge in 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end' is
    476 ; fall-through.
    477 ; CHECK-LABEL: fpcmp_unanalyzable_branch:
    478 ; CHECK:       # %bb.0: # %entry
    479 ; CHECK:       # %bb.1: # %entry.if.then_crit_edge
    480 ; CHECK:       .LBB10_5: # %if.then
    481 ; CHECK:       .LBB10_6: # %if.end
    482 ; CHECK:       # %bb.3: # %exit
    483 ; CHECK:       jne .LBB10_4
    484 ; CHECK-NEXT:  jnp .LBB10_6
    485 ; CHECK:       jmp .LBB10_5
    486 
    487 entry:
    488 ; Note that this branch must be strongly biased toward
    489 ; 'entry.if.then_crit_edge' to ensure that we would try to form a chain for
    490 ; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end'.
    491   br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1
    492 
    493 entry.if.then_crit_edge:
    494   %.pre14 = load i8, i8* undef, align 1
    495   br label %if.then
    496 
    497 lor.lhs.false:
    498   br i1 undef, label %if.end, label %exit
    499 
    500 exit:
    501   %cmp.i = fcmp une double 0.000000e+00, undef
    502   br i1 %cmp.i, label %if.then, label %if.end, !prof !3
    503 
    504 if.then:
    505   %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ]
    506   %1 = and i8 %0, 1
    507   store i8 %1, i8* undef, align 4
    508   br label %if.end
    509 
    510 if.end:
    511   ret void
    512 }
    513 
    514 !1 = !{!"branch_weights", i32 1000, i32 1}
    515 !3 = !{!"branch_weights", i32 1, i32 1000}
    516 
    517 declare i32 @f()
    518 declare i32 @g()
    519 declare i32 @h(i32 %x)
    520 
    521 define i32 @test_global_cfg_break_profitability() {
    522 ; Check that our metrics for the profitability of a CFG break are global rather
    523 ; than local. A successor may be very hot, but if the current block isn't, it
    524 ; doesn't matter. Within this test the 'then' block is slightly warmer than the
    525 ; 'else' block, but not nearly enough to merit merging it with the exit block
    526 ; even though the probability of 'then' branching to the 'exit' block is very
    527 ; high.
    528 ; CHECK: test_global_cfg_break_profitability
    529 ; CHECK: calll {{_?}}f
    530 ; CHECK: calll {{_?}}g
    531 ; CHECK: calll {{_?}}h
    532 ; CHECK: ret
    533 
    534 entry:
    535   br i1 undef, label %then, label %else, !prof !2
    536 
    537 then:
    538   %then.result = call i32 @f()
    539   br label %exit
    540 
    541 else:
    542   %else.result = call i32 @g()
    543   br label %exit
    544 
    545 exit:
    546   %result = phi i32 [ %then.result, %then ], [ %else.result, %else ]
    547   %result2 = call i32 @h(i32 %result)
    548   ret i32 %result
    549 }
    550 
    551 !2 = !{!"branch_weights", i32 3, i32 1}
    552 
    553 declare i32 @__gxx_personality_v0(...)
    554 
    555 define void @test_eh_lpad_successor() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
    556 ; Some times the landing pad ends up as the first successor of an invoke block.
    557 ; When this happens, a strange result used to fall out of updateTerminators: we
    558 ; didn't correctly locate the fallthrough successor, assuming blindly that the
    559 ; first one was the fallthrough successor. As a result, we would add an
    560 ; erroneous jump to the landing pad thinking *that* was the default successor.
    561 ; CHECK-LABEL: test_eh_lpad_successor
    562 ; CHECK: %entry
    563 ; CHECK-NOT: jmp
    564 ; CHECK: %loop
    565 
    566 entry:
    567   invoke i32 @f() to label %preheader unwind label %lpad
    568 
    569 preheader:
    570   br label %loop
    571 
    572 lpad:
    573   %lpad.val = landingpad { i8*, i32 }
    574           cleanup
    575   resume { i8*, i32 } %lpad.val
    576 
    577 loop:
    578   br label %loop
    579 }
    580 
    581 declare void @fake_throw() noreturn
    582 
    583 define void @test_eh_throw() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
    584 ; For blocks containing a 'throw' (or similar functionality), we have
    585 ; a no-return invoke. In this case, only EH successors will exist, and
    586 ; fallthrough simply won't occur. Make sure we don't crash trying to update
    587 ; terminators for such constructs.
    588 ;
    589 ; CHECK-LABEL: test_eh_throw
    590 ; CHECK: %entry
    591 ; CHECK: %cleanup
    592 
    593 entry:
    594   invoke void @fake_throw() to label %continue unwind label %cleanup
    595 
    596 continue:
    597   unreachable
    598 
    599 cleanup:
    600   %0 = landingpad { i8*, i32 }
    601           cleanup
    602   unreachable
    603 }
    604 
    605 define void @test_unnatural_cfg_backwards_inner_loop() {
    606 ; Test that when we encounter an unnatural CFG structure after having formed
    607 ; a chain for an inner loop which happened to be laid out backwards we don't
    608 ; attempt to merge onto the wrong end of the inner loop just because we find it
    609 ; first. This was reduced from a crasher in GCC's single source.
    610 ;
    611 ; CHECK-LABEL: test_unnatural_cfg_backwards_inner_loop
    612 ; CHECK: %entry
    613 ; CHECK: %loop2b
    614 ; CHECK: %loop3
    615 
    616 entry:
    617   br i1 undef, label %loop2a, label %body
    618 
    619 body:
    620   br label %loop2a
    621 
    622 loop1:
    623   %next.load = load i32*, i32** undef
    624   br i1 %comp.a, label %loop2a, label %loop2b
    625 
    626 loop2a:
    627   %var = phi i32* [ null, %entry ], [ null, %body ], [ %next.phi, %loop1 ]
    628   %next.var = phi i32* [ null, %entry ], [ undef, %body ], [ %next.load, %loop1 ]
    629   %comp.a = icmp eq i32* %var, null
    630   br label %loop3
    631 
    632 loop2b:
    633   %gep = getelementptr inbounds i32, i32* %var.phi, i32 0
    634   %next.ptr = bitcast i32* %gep to i32**
    635   store i32* %next.phi, i32** %next.ptr
    636   br label %loop3
    637 
    638 loop3:
    639   %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
    640   %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
    641   br label %loop1
    642 }
    643 
    644 define void @unanalyzable_branch_to_loop_header() {
    645 ; Ensure that we can handle unanalyzable branches into loop headers. We
    646 ; pre-form chains for unanalyzable branches, and will find the tail end of that
    647 ; at the start of the loop. This function uses floating point comparison
    648 ; fallthrough because that happens to always produce unanalyzable branches on
    649 ; x86.
    650 ;
    651 ; CHECK-LABEL: unanalyzable_branch_to_loop_header
    652 ; CHECK: %entry
    653 ; CHECK: %loop
    654 ; CHECK: %exit
    655 
    656 entry:
    657   %cmp = fcmp une double 0.000000e+00, undef
    658   br i1 %cmp, label %loop, label %exit
    659 
    660 loop:
    661   %cond = icmp eq i8 undef, 42
    662   br i1 %cond, label %exit, label %loop
    663 
    664 exit:
    665   ret void
    666 }
    667 
    668 define void @unanalyzable_branch_to_best_succ(i1 %cond) {
    669 ; Ensure that we can handle unanalyzable branches where the destination block
    670 ; gets selected as the optimal successor to merge.
    671 ;
    672 ; This branch is now analyzable and hence the destination block becomes the
    673 ; hotter one. The right order is entry->bar->exit->foo.
    674 ;
    675 ; CHECK-LABEL: unanalyzable_branch_to_best_succ
    676 ; CHECK: %entry
    677 ; CHECK: %bar
    678 ; CHECK: %exit
    679 ; CHECK: %foo
    680 
    681 entry:
    682   ; Bias this branch toward bar to ensure we form that chain.
    683   br i1 %cond, label %bar, label %foo, !prof !1
    684 
    685 foo:
    686   %cmp = fcmp une double 0.000000e+00, undef
    687   br i1 %cmp, label %bar, label %exit
    688 
    689 bar:
    690   call i32 @f()
    691   br label %exit
    692 
    693 exit:
    694   ret void
    695 }
    696 
    697 define void @unanalyzable_branch_to_free_block(float %x) {
    698 ; Ensure that we can handle unanalyzable branches where the destination block
    699 ; gets selected as the best free block in the CFG.
    700 ;
    701 ; CHECK-LABEL: unanalyzable_branch_to_free_block
    702 ; CHECK: %entry
    703 ; CHECK: %a
    704 ; CHECK: %b
    705 ; CHECK: %c
    706 ; CHECK: %exit
    707 
    708 entry:
    709   br i1 undef, label %a, label %b
    710 
    711 a:
    712   call i32 @f()
    713   br label %c
    714 
    715 b:
    716   %cmp = fcmp une float %x, undef
    717   br i1 %cmp, label %c, label %exit
    718 
    719 c:
    720   call i32 @g()
    721   br label %exit
    722 
    723 exit:
    724   ret void
    725 }
    726 
    727 define void @many_unanalyzable_branches() {
    728 ; Ensure that we don't crash as we're building up many unanalyzable branches,
    729 ; blocks, and loops.
    730 ;
    731 ; CHECK-LABEL: many_unanalyzable_branches
    732 ; CHECK: %entry
    733 ; CHECK: %exit
    734 
    735 entry:
    736   br label %0
    737 
    738   %val0 = load volatile float, float* undef
    739   %cmp0 = fcmp une float %val0, undef
    740   br i1 %cmp0, label %1, label %0
    741   %val1 = load volatile float, float* undef
    742   %cmp1 = fcmp une float %val1, undef
    743   br i1 %cmp1, label %2, label %1
    744   %val2 = load volatile float, float* undef
    745   %cmp2 = fcmp une float %val2, undef
    746   br i1 %cmp2, label %3, label %2
    747   %val3 = load volatile float, float* undef
    748   %cmp3 = fcmp une float %val3, undef
    749   br i1 %cmp3, label %4, label %3
    750   %val4 = load volatile float, float* undef
    751   %cmp4 = fcmp une float %val4, undef
    752   br i1 %cmp4, label %5, label %4
    753   %val5 = load volatile float, float* undef
    754   %cmp5 = fcmp une float %val5, undef
    755   br i1 %cmp5, label %6, label %5
    756   %val6 = load volatile float, float* undef
    757   %cmp6 = fcmp une float %val6, undef
    758   br i1 %cmp6, label %7, label %6
    759   %val7 = load volatile float, float* undef
    760   %cmp7 = fcmp une float %val7, undef
    761   br i1 %cmp7, label %8, label %7
    762   %val8 = load volatile float, float* undef
    763   %cmp8 = fcmp une float %val8, undef
    764   br i1 %cmp8, label %9, label %8
    765   %val9 = load volatile float, float* undef
    766   %cmp9 = fcmp une float %val9, undef
    767   br i1 %cmp9, label %10, label %9
    768   %val10 = load volatile float, float* undef
    769   %cmp10 = fcmp une float %val10, undef
    770   br i1 %cmp10, label %11, label %10
    771   %val11 = load volatile float, float* undef
    772   %cmp11 = fcmp une float %val11, undef
    773   br i1 %cmp11, label %12, label %11
    774   %val12 = load volatile float, float* undef
    775   %cmp12 = fcmp une float %val12, undef
    776   br i1 %cmp12, label %13, label %12
    777   %val13 = load volatile float, float* undef
    778   %cmp13 = fcmp une float %val13, undef
    779   br i1 %cmp13, label %14, label %13
    780   %val14 = load volatile float, float* undef
    781   %cmp14 = fcmp une float %val14, undef
    782   br i1 %cmp14, label %15, label %14
    783   %val15 = load volatile float, float* undef
    784   %cmp15 = fcmp une float %val15, undef
    785   br i1 %cmp15, label %16, label %15
    786   %val16 = load volatile float, float* undef
    787   %cmp16 = fcmp une float %val16, undef
    788   br i1 %cmp16, label %17, label %16
    789   %val17 = load volatile float, float* undef
    790   %cmp17 = fcmp une float %val17, undef
    791   br i1 %cmp17, label %18, label %17
    792   %val18 = load volatile float, float* undef
    793   %cmp18 = fcmp une float %val18, undef
    794   br i1 %cmp18, label %19, label %18
    795   %val19 = load volatile float, float* undef
    796   %cmp19 = fcmp une float %val19, undef
    797   br i1 %cmp19, label %20, label %19
    798   %val20 = load volatile float, float* undef
    799   %cmp20 = fcmp une float %val20, undef
    800   br i1 %cmp20, label %21, label %20
    801   %val21 = load volatile float, float* undef
    802   %cmp21 = fcmp une float %val21, undef
    803   br i1 %cmp21, label %22, label %21
    804   %val22 = load volatile float, float* undef
    805   %cmp22 = fcmp une float %val22, undef
    806   br i1 %cmp22, label %23, label %22
    807   %val23 = load volatile float, float* undef
    808   %cmp23 = fcmp une float %val23, undef
    809   br i1 %cmp23, label %24, label %23
    810   %val24 = load volatile float, float* undef
    811   %cmp24 = fcmp une float %val24, undef
    812   br i1 %cmp24, label %25, label %24
    813   %val25 = load volatile float, float* undef
    814   %cmp25 = fcmp une float %val25, undef
    815   br i1 %cmp25, label %26, label %25
    816   %val26 = load volatile float, float* undef
    817   %cmp26 = fcmp une float %val26, undef
    818   br i1 %cmp26, label %27, label %26
    819   %val27 = load volatile float, float* undef
    820   %cmp27 = fcmp une float %val27, undef
    821   br i1 %cmp27, label %28, label %27
    822   %val28 = load volatile float, float* undef
    823   %cmp28 = fcmp une float %val28, undef
    824   br i1 %cmp28, label %29, label %28
    825   %val29 = load volatile float, float* undef
    826   %cmp29 = fcmp une float %val29, undef
    827   br i1 %cmp29, label %30, label %29
    828   %val30 = load volatile float, float* undef
    829   %cmp30 = fcmp une float %val30, undef
    830   br i1 %cmp30, label %31, label %30
    831   %val31 = load volatile float, float* undef
    832   %cmp31 = fcmp une float %val31, undef
    833   br i1 %cmp31, label %32, label %31
    834   %val32 = load volatile float, float* undef
    835   %cmp32 = fcmp une float %val32, undef
    836   br i1 %cmp32, label %33, label %32
    837   %val33 = load volatile float, float* undef
    838   %cmp33 = fcmp une float %val33, undef
    839   br i1 %cmp33, label %34, label %33
    840   %val34 = load volatile float, float* undef
    841   %cmp34 = fcmp une float %val34, undef
    842   br i1 %cmp34, label %35, label %34
    843   %val35 = load volatile float, float* undef
    844   %cmp35 = fcmp une float %val35, undef
    845   br i1 %cmp35, label %36, label %35
    846   %val36 = load volatile float, float* undef
    847   %cmp36 = fcmp une float %val36, undef
    848   br i1 %cmp36, label %37, label %36
    849   %val37 = load volatile float, float* undef
    850   %cmp37 = fcmp une float %val37, undef
    851   br i1 %cmp37, label %38, label %37
    852   %val38 = load volatile float, float* undef
    853   %cmp38 = fcmp une float %val38, undef
    854   br i1 %cmp38, label %39, label %38
    855   %val39 = load volatile float, float* undef
    856   %cmp39 = fcmp une float %val39, undef
    857   br i1 %cmp39, label %40, label %39
    858   %val40 = load volatile float, float* undef
    859   %cmp40 = fcmp une float %val40, undef
    860   br i1 %cmp40, label %41, label %40
    861   %val41 = load volatile float, float* undef
    862   %cmp41 = fcmp une float %val41, undef
    863   br i1 %cmp41, label %42, label %41
    864   %val42 = load volatile float, float* undef
    865   %cmp42 = fcmp une float %val42, undef
    866   br i1 %cmp42, label %43, label %42
    867   %val43 = load volatile float, float* undef
    868   %cmp43 = fcmp une float %val43, undef
    869   br i1 %cmp43, label %44, label %43
    870   %val44 = load volatile float, float* undef
    871   %cmp44 = fcmp une float %val44, undef
    872   br i1 %cmp44, label %45, label %44
    873   %val45 = load volatile float, float* undef
    874   %cmp45 = fcmp une float %val45, undef
    875   br i1 %cmp45, label %46, label %45
    876   %val46 = load volatile float, float* undef
    877   %cmp46 = fcmp une float %val46, undef
    878   br i1 %cmp46, label %47, label %46
    879   %val47 = load volatile float, float* undef
    880   %cmp47 = fcmp une float %val47, undef
    881   br i1 %cmp47, label %48, label %47
    882   %val48 = load volatile float, float* undef
    883   %cmp48 = fcmp une float %val48, undef
    884   br i1 %cmp48, label %49, label %48
    885   %val49 = load volatile float, float* undef
    886   %cmp49 = fcmp une float %val49, undef
    887   br i1 %cmp49, label %50, label %49
    888   %val50 = load volatile float, float* undef
    889   %cmp50 = fcmp une float %val50, undef
    890   br i1 %cmp50, label %51, label %50
    891   %val51 = load volatile float, float* undef
    892   %cmp51 = fcmp une float %val51, undef
    893   br i1 %cmp51, label %52, label %51
    894   %val52 = load volatile float, float* undef
    895   %cmp52 = fcmp une float %val52, undef
    896   br i1 %cmp52, label %53, label %52
    897   %val53 = load volatile float, float* undef
    898   %cmp53 = fcmp une float %val53, undef
    899   br i1 %cmp53, label %54, label %53
    900   %val54 = load volatile float, float* undef
    901   %cmp54 = fcmp une float %val54, undef
    902   br i1 %cmp54, label %55, label %54
    903   %val55 = load volatile float, float* undef
    904   %cmp55 = fcmp une float %val55, undef
    905   br i1 %cmp55, label %56, label %55
    906   %val56 = load volatile float, float* undef
    907   %cmp56 = fcmp une float %val56, undef
    908   br i1 %cmp56, label %57, label %56
    909   %val57 = load volatile float, float* undef
    910   %cmp57 = fcmp une float %val57, undef
    911   br i1 %cmp57, label %58, label %57
    912   %val58 = load volatile float, float* undef
    913   %cmp58 = fcmp une float %val58, undef
    914   br i1 %cmp58, label %59, label %58
    915   %val59 = load volatile float, float* undef
    916   %cmp59 = fcmp une float %val59, undef
    917   br i1 %cmp59, label %60, label %59
    918   %val60 = load volatile float, float* undef
    919   %cmp60 = fcmp une float %val60, undef
    920   br i1 %cmp60, label %61, label %60
    921   %val61 = load volatile float, float* undef
    922   %cmp61 = fcmp une float %val61, undef
    923   br i1 %cmp61, label %62, label %61
    924   %val62 = load volatile float, float* undef
    925   %cmp62 = fcmp une float %val62, undef
    926   br i1 %cmp62, label %63, label %62
    927   %val63 = load volatile float, float* undef
    928   %cmp63 = fcmp une float %val63, undef
    929   br i1 %cmp63, label %64, label %63
    930   %val64 = load volatile float, float* undef
    931   %cmp64 = fcmp une float %val64, undef
    932   br i1 %cmp64, label %65, label %64
    933 
    934   br label %exit
    935 exit:
    936   ret void
    937 }
    938 
    939 define void @benchmark_heapsort(i32 %n, double* nocapture %ra) {
    940 ; This test case comes from the heapsort benchmark, and exemplifies several
    941 ; important aspects to block placement in the presence of loops:
    942 ; 1) Loop rotation needs to *ensure* that the desired exiting edge can be
    943 ;    a fallthrough.
    944 ; 2) The exiting edge from the loop which is rotated to be laid out at the
    945 ;    bottom of the loop needs to be exiting into the nearest enclosing loop (to
    946 ;    which there is an exit). Otherwise, we force that enclosing loop into
    947 ;    strange layouts that are siginificantly less efficient, often times making
    948 ;    it discontiguous.
    949 ;
    950 ; CHECK-LABEL: @benchmark_heapsort
    951 ; CHECK: %entry
    952 ; First rotated loop top.
    953 ; CHECK: .p2align
    954 ; CHECK: %while.end
    955 ; %for.cond gets completely tail-duplicated away.
    956 ; CHECK: %if.then
    957 ; CHECK: %if.else
    958 ; CHECK: %if.end10
    959 ; Second rotated loop top
    960 ; CHECK: .p2align
    961 ; CHECK: %if.then24
    962 ; CHECK: %while.cond.outer
    963 ; Third rotated loop top
    964 ; CHECK: .p2align
    965 ; CHECK: %while.cond
    966 ; CHECK: %while.body
    967 ; CHECK: %land.lhs.true
    968 ; CHECK: %if.then19
    969 ; CHECK: %if.end20
    970 ; CHECK: %if.then8
    971 ; CHECK: ret
    972 
    973 entry:
    974   %shr = ashr i32 %n, 1
    975   %add = add nsw i32 %shr, 1
    976   %arrayidx3 = getelementptr inbounds double, double* %ra, i64 1
    977   br label %for.cond
    978 
    979 for.cond:
    980   %ir.0 = phi i32 [ %n, %entry ], [ %ir.1, %while.end ]
    981   %l.0 = phi i32 [ %add, %entry ], [ %l.1, %while.end ]
    982   %cmp = icmp sgt i32 %l.0, 1
    983   br i1 %cmp, label %if.then, label %if.else
    984 
    985 if.then:
    986   %dec = add nsw i32 %l.0, -1
    987   %idxprom = sext i32 %dec to i64
    988   %arrayidx = getelementptr inbounds double, double* %ra, i64 %idxprom
    989   %0 = load double, double* %arrayidx, align 8
    990   br label %if.end10
    991 
    992 if.else:
    993   %idxprom1 = sext i32 %ir.0 to i64
    994   %arrayidx2 = getelementptr inbounds double, double* %ra, i64 %idxprom1
    995   %1 = load double, double* %arrayidx2, align 8
    996   %2 = load double, double* %arrayidx3, align 8
    997   store double %2, double* %arrayidx2, align 8
    998   %dec6 = add nsw i32 %ir.0, -1
    999   %cmp7 = icmp eq i32 %dec6, 1
   1000   br i1 %cmp7, label %if.then8, label %if.end10
   1001 
   1002 if.then8:
   1003   store double %1, double* %arrayidx3, align 8
   1004   ret void
   1005 
   1006 if.end10:
   1007   %ir.1 = phi i32 [ %ir.0, %if.then ], [ %dec6, %if.else ]
   1008   %l.1 = phi i32 [ %dec, %if.then ], [ %l.0, %if.else ]
   1009   %rra.0 = phi double [ %0, %if.then ], [ %1, %if.else ]
   1010   %add31 = add nsw i32 %ir.1, 1
   1011   br label %while.cond.outer
   1012 
   1013 while.cond.outer:
   1014   %j.0.ph.in = phi i32 [ %l.1, %if.end10 ], [ %j.1, %if.then24 ]
   1015   %j.0.ph = shl i32 %j.0.ph.in, 1
   1016   br label %while.cond
   1017 
   1018 while.cond:
   1019   %j.0 = phi i32 [ %add31, %if.end20 ], [ %j.0.ph, %while.cond.outer ]
   1020   %cmp11 = icmp sgt i32 %j.0, %ir.1
   1021   br i1 %cmp11, label %while.end, label %while.body
   1022 
   1023 while.body:
   1024   %cmp12 = icmp slt i32 %j.0, %ir.1
   1025   br i1 %cmp12, label %land.lhs.true, label %if.end20
   1026 
   1027 land.lhs.true:
   1028   %idxprom13 = sext i32 %j.0 to i64
   1029   %arrayidx14 = getelementptr inbounds double, double* %ra, i64 %idxprom13
   1030   %3 = load double, double* %arrayidx14, align 8
   1031   %add15 = add nsw i32 %j.0, 1
   1032   %idxprom16 = sext i32 %add15 to i64
   1033   %arrayidx17 = getelementptr inbounds double, double* %ra, i64 %idxprom16
   1034   %4 = load double, double* %arrayidx17, align 8
   1035   %cmp18 = fcmp olt double %3, %4
   1036   br i1 %cmp18, label %if.then19, label %if.end20
   1037 
   1038 if.then19:
   1039   br label %if.end20
   1040 
   1041 if.end20:
   1042   %j.1 = phi i32 [ %add15, %if.then19 ], [ %j.0, %land.lhs.true ], [ %j.0, %while.body ]
   1043   %idxprom21 = sext i32 %j.1 to i64
   1044   %arrayidx22 = getelementptr inbounds double, double* %ra, i64 %idxprom21
   1045   %5 = load double, double* %arrayidx22, align 8
   1046   %cmp23 = fcmp olt double %rra.0, %5
   1047   br i1 %cmp23, label %if.then24, label %while.cond
   1048 
   1049 if.then24:
   1050   %idxprom27 = sext i32 %j.0.ph.in to i64
   1051   %arrayidx28 = getelementptr inbounds double, double* %ra, i64 %idxprom27
   1052   store double %5, double* %arrayidx28, align 8
   1053   br label %while.cond.outer
   1054 
   1055 while.end:
   1056   %idxprom33 = sext i32 %j.0.ph.in to i64
   1057   %arrayidx34 = getelementptr inbounds double, double* %ra, i64 %idxprom33
   1058   store double %rra.0, double* %arrayidx34, align 8
   1059   br label %for.cond
   1060 }
   1061 
   1062 declare void @cold_function() cold
   1063 
   1064 define i32 @test_cold_calls(i32* %a) {
   1065 ; Test that edges to blocks post-dominated by cold calls are
   1066 ; marked as not expected to be taken.  They should be laid out
   1067 ; at the bottom.
   1068 ; CHECK-LABEL: test_cold_calls:
   1069 ; CHECK: %entry
   1070 ; CHECK: %else
   1071 ; CHECK: %exit
   1072 ; CHECK: %then
   1073 
   1074 entry:
   1075   %gep1 = getelementptr i32, i32* %a, i32 1
   1076   %val1 = load i32, i32* %gep1
   1077   %cond1 = icmp ugt i32 %val1, 1
   1078   br i1 %cond1, label %then, label %else
   1079 
   1080 then:
   1081   call void @cold_function()
   1082   br label %exit
   1083 
   1084 else:
   1085   %gep2 = getelementptr i32, i32* %a, i32 2
   1086   %val2 = load i32, i32* %gep2
   1087   br label %exit
   1088 
   1089 exit:
   1090   %ret = phi i32 [ %val1, %then ], [ %val2, %else ]
   1091   ret i32 %ret
   1092 }
   1093 
   1094 ; Make sure we put landingpads out of the way.
   1095 declare i32 @pers(...)
   1096 
   1097 declare i32 @foo();
   1098 
   1099 declare i32 @bar();
   1100 
   1101 define i32 @test_lp(i32 %a) personality i32 (...)* @pers {
   1102 ; CHECK-LABEL: test_lp:
   1103 ; CHECK: %entry
   1104 ; CHECK: %hot
   1105 ; CHECK: %then
   1106 ; CHECK: %cold
   1107 ; CHECK: %coldlp
   1108 ; CHECK: %hotlp
   1109 ; CHECK: %lpret
   1110 entry:
   1111   %0 = icmp sgt i32 %a, 1
   1112   br i1 %0, label %hot, label %cold, !prof !4
   1113 
   1114 hot:
   1115   %1 = invoke i32 @foo()
   1116           to label %then unwind label %hotlp
   1117 
   1118 cold:
   1119   %2 = invoke i32 @bar()
   1120           to label %then unwind label %coldlp
   1121 
   1122 then:
   1123   %3 = phi i32 [ %1, %hot ], [ %2, %cold ]
   1124   ret i32 %3
   1125 
   1126 hotlp:
   1127   %4 = landingpad { i8*, i32 }
   1128           cleanup
   1129   br label %lpret
   1130 
   1131 coldlp:
   1132   %5 = landingpad { i8*, i32 }
   1133           cleanup
   1134   br label %lpret
   1135 
   1136 lpret:
   1137   %6 = phi i32 [-1, %hotlp], [-2, %coldlp]
   1138   %7 = add i32 %6, 42
   1139   ret i32 %7
   1140 }
   1141 
   1142 !4 = !{!"branch_weights", i32 65536, i32 0}
   1143 
   1144 ; Make sure that ehpad are scheduled from the least probable one
   1145 ; to the most probable one. See selectBestCandidateBlock as to why.
   1146 declare void @clean();
   1147 
   1148 define void @test_flow_unwind() personality i32 (...)* @pers {
   1149 ; CHECK-LABEL: test_flow_unwind:
   1150 ; CHECK: %entry
   1151 ; CHECK: %then
   1152 ; CHECK: %exit
   1153 ; CHECK: %innerlp
   1154 ; CHECK: %outerlp
   1155 ; CHECK: %outercleanup
   1156 entry:
   1157   %0 = invoke i32 @foo()
   1158           to label %then unwind label %outerlp
   1159 
   1160 then:
   1161   %1 = invoke i32 @bar()
   1162           to label %exit unwind label %innerlp
   1163 
   1164 exit:
   1165   ret void
   1166 
   1167 innerlp:
   1168   %2 = landingpad { i8*, i32 }
   1169           cleanup
   1170   br label %innercleanup
   1171 
   1172 outerlp:
   1173   %3 = landingpad { i8*, i32 }
   1174           cleanup
   1175   br label %outercleanup
   1176 
   1177 outercleanup:
   1178   %4 = phi { i8*, i32 } [%2, %innercleanup], [%3, %outerlp]
   1179   call void @clean()
   1180   resume { i8*, i32 } %4
   1181 
   1182 innercleanup:
   1183   call void @clean()
   1184   br label %outercleanup
   1185 }
   1186 
   1187 declare void @hot_function()
   1188 
   1189 define void @test_hot_branch(i32* %a) {
   1190 ; Test that a hot branch that has a probability a little larger than 80% will
   1191 ; break CFG constrains when doing block placement.
   1192 ; CHECK-LABEL: test_hot_branch:
   1193 ; CHECK: %entry
   1194 ; CHECK: %then
   1195 ; CHECK: %exit
   1196 ; CHECK: %else
   1197 
   1198 entry:
   1199   %gep1 = getelementptr i32, i32* %a, i32 1
   1200   %val1 = load i32, i32* %gep1
   1201   %cond1 = icmp ugt i32 %val1, 1
   1202   br i1 %cond1, label %then, label %else, !prof !5
   1203 
   1204 then:
   1205   call void @hot_function()
   1206   br label %exit
   1207 
   1208 else:
   1209   call void @cold_function()
   1210   br label %exit
   1211 
   1212 exit:
   1213   call void @hot_function()
   1214   ret void
   1215 }
   1216 
   1217 define void @test_hot_branch_profile(i32* %a) !prof !6 {
   1218 ; Test that a hot branch that has a probability a little larger than 50% will
   1219 ; break CFG constrains when doing block placement when profile is available.
   1220 ; CHECK-LABEL: test_hot_branch_profile:
   1221 ; CHECK: %entry
   1222 ; CHECK: %then
   1223 ; CHECK: %exit
   1224 ; CHECK: %else
   1225 
   1226 entry:
   1227   %gep1 = getelementptr i32, i32* %a, i32 1
   1228   %val1 = load i32, i32* %gep1
   1229   %cond1 = icmp ugt i32 %val1, 1
   1230   br i1 %cond1, label %then, label %else, !prof !7
   1231 
   1232 then:
   1233   call void @hot_function()
   1234   br label %exit
   1235 
   1236 else:
   1237   call void @cold_function()
   1238   br label %exit
   1239 
   1240 exit:
   1241   call void @hot_function()
   1242   ret void
   1243 }
   1244 
   1245 define void @test_hot_branch_triangle_profile(i32* %a) !prof !6 {
   1246 ; Test that a hot branch that has a probability a little larger than 80% will
   1247 ; break triangle shaped CFG constrains when doing block placement if profile
   1248 ; is present.
   1249 ; CHECK-LABEL: test_hot_branch_triangle_profile:
   1250 ; CHECK: %entry
   1251 ; CHECK: %exit
   1252 ; CHECK: %then
   1253 
   1254 entry:
   1255   %gep1 = getelementptr i32, i32* %a, i32 1
   1256   %val1 = load i32, i32* %gep1
   1257   %cond1 = icmp ugt i32 %val1, 1
   1258   br i1 %cond1, label %exit, label %then, !prof !5
   1259 
   1260 then:
   1261   call void @hot_function()
   1262   br label %exit
   1263 
   1264 exit:
   1265   call void @hot_function()
   1266   ret void
   1267 }
   1268 
   1269 define void @test_hot_branch_triangle_profile_topology(i32* %a) !prof !6 {
   1270 ; Test that a hot branch that has a probability between 50% and 66% will not
   1271 ; break triangle shaped CFG constrains when doing block placement if profile
   1272 ; is present.
   1273 ; CHECK-LABEL: test_hot_branch_triangle_profile_topology:
   1274 ; CHECK: %entry
   1275 ; CHECK: %then
   1276 ; CHECK: %exit
   1277 
   1278 entry:
   1279   %gep1 = getelementptr i32, i32* %a, i32 1
   1280   %val1 = load i32, i32* %gep1
   1281   %cond1 = icmp ugt i32 %val1, 1
   1282   br i1 %cond1, label %exit, label %then, !prof !7
   1283 
   1284 then:
   1285   call void @hot_function()
   1286   br label %exit
   1287 
   1288 exit:
   1289   call void @hot_function()
   1290   ret void
   1291 }
   1292 
   1293 declare void @a()
   1294 declare void @b()
   1295 
   1296 define void @test_forked_hot_diamond(i32* %a) {
   1297 ; Test that a hot-branch with probability > 80% followed by a 50/50 branch
   1298 ; will not place the cold predecessor if the probability for the fallthrough
   1299 ; remains above 80%
   1300 ; CHECK-LABEL: test_forked_hot_diamond
   1301 ; CHECK: %entry
   1302 ; CHECK: %then
   1303 ; CHECK: %fork1
   1304 ; CHECK: %else
   1305 ; CHECK: %fork2
   1306 ; CHECK: %exit
   1307 entry:
   1308   %gep1 = getelementptr i32, i32* %a, i32 1
   1309   %val1 = load i32, i32* %gep1
   1310   %cond1 = icmp ugt i32 %val1, 1
   1311   br i1 %cond1, label %then, label %else, !prof !5
   1312 
   1313 then:
   1314   call void @hot_function()
   1315   %gep2 = getelementptr i32, i32* %a, i32 2
   1316   %val2 = load i32, i32* %gep2
   1317   %cond2 = icmp ugt i32 %val2, 2
   1318   br i1 %cond2, label %fork1, label %fork2, !prof !8
   1319 
   1320 else:
   1321   call void @cold_function()
   1322   %gep3 = getelementptr i32, i32* %a, i32 3
   1323   %val3 = load i32, i32* %gep3
   1324   %cond3 = icmp ugt i32 %val3, 3
   1325   br i1 %cond3, label %fork1, label %fork2, !prof !8
   1326 
   1327 fork1:
   1328   call void @a()
   1329   br label %exit
   1330 
   1331 fork2:
   1332   call void @b()
   1333   br label %exit
   1334 
   1335 exit:
   1336   call void @hot_function()
   1337   ret void
   1338 }
   1339 
   1340 define void @test_forked_hot_diamond_gets_cold(i32* %a) {
   1341 ; Test that a hot-branch with probability > 80% followed by a 50/50 branch
   1342 ; will place the cold predecessor if the probability for the fallthrough
   1343 ; falls below 80%
   1344 ; The probability for both branches is 85%. For then2 vs else1
   1345 ; this results in a compounded probability of 83%.
   1346 ; Neither then2->fork1 nor then2->fork2 has a large enough relative
   1347 ; probability to break the CFG.
   1348 ; Relative probs:
   1349 ; then2 -> fork1 vs else1 -> fork1 = 71%
   1350 ; then2 -> fork2 vs else2 -> fork2 = 74%
   1351 ; CHECK-LABEL: test_forked_hot_diamond_gets_cold
   1352 ; CHECK: %entry
   1353 ; CHECK: %then1
   1354 ; CHECK: %then2
   1355 ; CHECK: %else1
   1356 ; CHECK: %fork1
   1357 ; CHECK: %else2
   1358 ; CHECK: %fork2
   1359 ; CHECK: %exit
   1360 entry:
   1361   %gep1 = getelementptr i32, i32* %a, i32 1
   1362   %val1 = load i32, i32* %gep1
   1363   %cond1 = icmp ugt i32 %val1, 1
   1364   br i1 %cond1, label %then1, label %else1, !prof !9
   1365 
   1366 then1:
   1367   call void @hot_function()
   1368   %gep2 = getelementptr i32, i32* %a, i32 2
   1369   %val2 = load i32, i32* %gep2
   1370   %cond2 = icmp ugt i32 %val2, 2
   1371   br i1 %cond2, label %then2, label %else2, !prof !9
   1372 
   1373 else1:
   1374   call void @cold_function()
   1375   br label %fork1
   1376 
   1377 then2:
   1378   call void @hot_function()
   1379   %gep3 = getelementptr i32, i32* %a, i32 3
   1380   %val3 = load i32, i32* %gep2
   1381   %cond3 = icmp ugt i32 %val2, 3
   1382   br i1 %cond3, label %fork1, label %fork2, !prof !8
   1383 
   1384 else2:
   1385   call void @cold_function()
   1386   br label %fork2
   1387 
   1388 fork1:
   1389   call void @a()
   1390   br label %exit
   1391 
   1392 fork2:
   1393   call void @b()
   1394   br label %exit
   1395 
   1396 exit:
   1397   call void @hot_function()
   1398   ret void
   1399 }
   1400 
   1401 define void @test_forked_hot_diamond_stays_hot(i32* %a) {
   1402 ; Test that a hot-branch with probability > 88.88% (1:8) followed by a 50/50
   1403 ; branch will not place the cold predecessor as the probability for the
   1404 ; fallthrough stays above 80%
   1405 ; (1:8) followed by (1:1) is still (1:4)
   1406 ; Here we use 90% probability because two in a row
   1407 ; have a 89 % probability vs the original branch.
   1408 ; CHECK-LABEL: test_forked_hot_diamond_stays_hot
   1409 ; CHECK: %entry
   1410 ; CHECK: %then1
   1411 ; CHECK: %then2
   1412 ; CHECK: %fork1
   1413 ; CHECK: %else1
   1414 ; CHECK: %else2
   1415 ; CHECK: %fork2
   1416 ; CHECK: %exit
   1417 entry:
   1418   %gep1 = getelementptr i32, i32* %a, i32 1
   1419   %val1 = load i32, i32* %gep1
   1420   %cond1 = icmp ugt i32 %val1, 1
   1421   br i1 %cond1, label %then1, label %else1, !prof !10
   1422 
   1423 then1:
   1424   call void @hot_function()
   1425   %gep2 = getelementptr i32, i32* %a, i32 2
   1426   %val2 = load i32, i32* %gep2
   1427   %cond2 = icmp ugt i32 %val2, 2
   1428   br i1 %cond2, label %then2, label %else2, !prof !10
   1429 
   1430 else1:
   1431   call void @cold_function()
   1432   br label %fork1
   1433 
   1434 then2:
   1435   call void @hot_function()
   1436   %gep3 = getelementptr i32, i32* %a, i32 3
   1437   %val3 = load i32, i32* %gep2
   1438   %cond3 = icmp ugt i32 %val2, 3
   1439   br i1 %cond3, label %fork1, label %fork2, !prof !8
   1440 
   1441 else2:
   1442   call void @cold_function()
   1443   br label %fork2
   1444 
   1445 fork1:
   1446   call void @a()
   1447   br label %exit
   1448 
   1449 fork2:
   1450   call void @b()
   1451   br label %exit
   1452 
   1453 exit:
   1454   call void @hot_function()
   1455   ret void
   1456 }
   1457 
   1458 ; Because %endif has a higher frequency than %if, the calculations show we
   1459 ; shouldn't tail-duplicate %endif so that we can place it after %if. We were
   1460 ; previously undercounting the cost by ignoring execution frequency that didn't
   1461 ; come from the %if->%endif path.
   1462 ; CHECK-LABEL: higher_frequency_succ_tail_dup
   1463 ; CHECK: %entry
   1464 ; CHECK: %elseif
   1465 ; CHECK: %else
   1466 ; CHECK: %endif
   1467 ; CHECK: %then
   1468 ; CHECK: %ret
   1469 define void @higher_frequency_succ_tail_dup(i1 %a, i1 %b, i1 %c) {
   1470 entry:
   1471   br label %if
   1472 if:                                               ; preds = %entry
   1473   call void @effect(i32 0)
   1474   br i1 %a, label %elseif, label %endif, !prof !11 ; even
   1475 
   1476 elseif:                                           ; preds = %if
   1477   call void @effect(i32 1)
   1478   br i1 %b, label %else, label %endif, !prof !11 ; even
   1479 
   1480 else:                                             ; preds = %elseif
   1481   call void @effect(i32 2)
   1482   br label %endif
   1483 
   1484 endif:                                            ; preds = %if, %elseif, %else
   1485   br i1 %c, label %then, label %ret, !prof !12 ; 5 to 3
   1486 
   1487 then:                                             ; preds = %endif
   1488   call void @effect(i32 3)
   1489   br label %ret
   1490 
   1491 ret:                                              ; preds = %endif, %then
   1492   ret void
   1493 }
   1494 
   1495 define i32 @not_rotate_if_extra_branch(i32 %count) {
   1496 ; Test checks that there is no loop rotation
   1497 ; if it introduces extra branch.
   1498 ; Specifically in this case because best exit is .header
   1499 ; but it has fallthrough to .middle block and last block in
   1500 ; loop chain .slow does not have afallthrough to .header.
   1501 ; CHECK-LABEL: not_rotate_if_extra_branch
   1502 ; CHECK: %.entry
   1503 ; CHECK: %.header
   1504 ; CHECK: %.middle
   1505 ; CHECK: %.backedge
   1506 ; CHECK: %.slow
   1507 ; CHECK: %.bailout
   1508 ; CHECK: %.stop
   1509 .entry:
   1510   %sum.0 = shl nsw i32 %count, 1
   1511   br label %.header
   1512 
   1513 .header:
   1514   %i = phi i32 [ %i.1, %.backedge ], [ 0, %.entry ]
   1515   %sum = phi i32 [ %sum.1, %.backedge ], [ %sum.0, %.entry ]
   1516   %is_exc = icmp sgt i32 %i, 9000000
   1517   br i1 %is_exc, label %.bailout, label %.middle, !prof !13
   1518 
   1519 .bailout:
   1520   %sum.2 = add nsw i32 %count, 1
   1521   br label %.stop
   1522 
   1523 .middle:
   1524   %pr.1 = and i32 %i, 1023
   1525   %pr.2 = icmp eq i32 %pr.1, 0
   1526   br i1 %pr.2, label %.slow, label %.backedge, !prof !14
   1527 
   1528 .slow:
   1529   tail call void @effect(i32 %sum)
   1530   br label %.backedge
   1531 
   1532 .backedge:
   1533   %sum.1 = add nsw i32 %i, %sum
   1534   %i.1 = add nsw i32 %i, 1
   1535   %end = icmp slt i32 %i.1, %count
   1536   br i1 %end, label %.header, label %.stop, !prof !15
   1537 
   1538 .stop:
   1539   %sum.phi = phi i32 [ %sum.1, %.backedge ], [ %sum.2, %.bailout ]
   1540   ret i32 %sum.phi
   1541 }
   1542 
   1543 define i32 @not_rotate_if_extra_branch_regression(i32 %count, i32 %init) {
   1544 ; This is a regression test against patch avoid loop rotation if
   1545 ; it introduce an extra btanch.
   1546 ; CHECK-LABEL: not_rotate_if_extra_branch_regression
   1547 ; CHECK: %.entry
   1548 ; CHECK: %.first_backedge
   1549 ; CHECK: %.slow
   1550 ; CHECK: %.second_header
   1551 .entry:
   1552   %sum.0 = shl nsw i32 %count, 1
   1553   br label %.first_header
   1554 
   1555 .first_header:
   1556   %i = phi i32 [ %i.1, %.first_backedge ], [ 0, %.entry ]
   1557   %is_bo1 = icmp sgt i32 %i, 9000000
   1558   br i1 %is_bo1, label %.bailout, label %.first_backedge, !prof !14
   1559 
   1560 .first_backedge:
   1561   %i.1 = add nsw i32 %i, 1
   1562   %end = icmp slt i32 %i.1, %count
   1563   br i1 %end, label %.first_header, label %.second_header, !prof !13
   1564 
   1565 .second_header:
   1566   %j = phi i32 [ %j.1, %.second_backedge ], [ %init, %.first_backedge ]
   1567   %end.2 = icmp sgt i32 %j, %count
   1568   br i1 %end.2, label %.stop, label %.second_middle, !prof !14
   1569 
   1570 .second_middle:
   1571   %is_slow = icmp sgt i32 %j, 9000000
   1572   br i1 %is_slow, label %.slow, label %.second_backedge, !prof !14
   1573 
   1574 .slow:
   1575   tail call void @effect(i32 %j)
   1576   br label %.second_backedge
   1577 
   1578 .second_backedge:
   1579   %j.1 = add nsw i32 %j, 1
   1580   %end.3 = icmp slt i32 %j, 10000000
   1581   br i1 %end.3, label %.second_header, label %.stop, !prof !13
   1582 
   1583 .stop:
   1584   %res = add nsw i32 %j, %i.1
   1585   ret i32 %res
   1586 
   1587 .bailout:
   1588   ret i32 0
   1589 }
   1590 
   1591 declare void @effect(i32)
   1592 
   1593 !5 = !{!"branch_weights", i32 84, i32 16}
   1594 !6 = !{!"function_entry_count", i32 10}
   1595 !7 = !{!"branch_weights", i32 60, i32 40}
   1596 !8 = !{!"branch_weights", i32 5001, i32 4999}
   1597 !9 = !{!"branch_weights", i32 85, i32 15}
   1598 !10 = !{!"branch_weights", i32 90, i32 10}
   1599 !11 = !{!"branch_weights", i32 1, i32 1}
   1600 !12 = !{!"branch_weights", i32 5, i32 3}
   1601 !13 = !{!"branch_weights", i32 1, i32 1}
   1602 !14 = !{!"branch_weights", i32 1, i32 1023}
   1603 !15 = !{!"branch_weights", i32 4095, i32 1}
   1604