Home | History | Annotate | Download | only in PowerPC
      1 ; RUN: llc -O2 -ppc-reduce-cr-logicals -o - %s | FileCheck \
      2 ; RUN:   --check-prefix=CHECK --check-prefix=CHECK-O2 %s
      3 ; RUN: llc -O3 -ppc-reduce-cr-logicals -o - %s | FileCheck \
      4 ; RUN:   --check-prefix=CHECK --check-prefix=CHECK-O3 %s
      5 target datalayout = "e-m:e-i64:64-n32:64"
      6 target triple = "powerpc64le-grtev4-linux-gnu"
      7 
      8 ; Intended layout:
      9 ; The chain-based outlining produces the layout
     10 ; test1
     11 ; test2
     12 ; test3
     13 ; test4
     14 ; optional1
     15 ; optional2
     16 ; optional3
     17 ; optional4
     18 ; exit
     19 ; Tail duplication puts test n+1 at the end of optional n
     20 ; so optional1 includes a copy of test2 at the end, and branches
     21 ; to test3 (at the top) or falls through to optional 2.
     22 ; The CHECK statements check for the whole string of tests
     23 ; and then check that the correct test has been duplicated into the end of
     24 ; the optional blocks and that the optional blocks are in the correct order.
     25 ;CHECK-LABEL: straight_test:
     26 ; test1 may have been merged with entry
     27 ;CHECK: mr [[TAGREG:[0-9]+]], 3
     28 ;CHECK: andi. {{[0-9]+}}, [[TAGREG:[0-9]+]], 1
     29 ;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
     30 ;CHECK-NEXT: # %test2
     31 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
     32 ;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
     33 ;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
     34 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
     35 ;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
     36 ;CHECK-NEXT: .[[TEST4LABEL:[_0-9A-Za-z]+]]: # %test4
     37 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
     38 ;CHECK-NEXT: bne 0, .[[OPT4LABEL:[_0-9A-Za-z]+]]
     39 ;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
     40 ;CHECK: blr
     41 ;CHECK-NEXT: .[[OPT1LABEL]]:
     42 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
     43 ;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
     44 ;CHECK-NEXT: .[[OPT2LABEL]]:
     45 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
     46 ;CHECK-NEXT: beq 0, .[[TEST4LABEL]]
     47 ;CHECK-NEXT: .[[OPT3LABEL]]:
     48 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
     49 ;CHECK-NEXT: beq 0, .[[EXITLABEL]]
     50 ;CHECK-NEXT: .[[OPT4LABEL]]:
     51 ;CHECK: b .[[EXITLABEL]]
     52 
     53 define void @straight_test(i32 %tag) {
     54 entry:
     55   br label %test1
     56 test1:
     57   %tagbit1 = and i32 %tag, 1
     58   %tagbit1eq0 = icmp eq i32 %tagbit1, 0
     59   br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
     60 optional1:
     61   call void @a()
     62   call void @a()
     63   call void @a()
     64   call void @a()
     65   br label %test2
     66 test2:
     67   %tagbit2 = and i32 %tag, 2
     68   %tagbit2eq0 = icmp eq i32 %tagbit2, 0
     69   br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
     70 optional2:
     71   call void @b()
     72   call void @b()
     73   call void @b()
     74   call void @b()
     75   br label %test3
     76 test3:
     77   %tagbit3 = and i32 %tag, 4
     78   %tagbit3eq0 = icmp eq i32 %tagbit3, 0
     79   br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
     80 optional3:
     81   call void @c()
     82   call void @c()
     83   call void @c()
     84   call void @c()
     85   br label %test4
     86 test4:
     87   %tagbit4 = and i32 %tag, 8
     88   %tagbit4eq0 = icmp eq i32 %tagbit4, 0
     89   br i1 %tagbit4eq0, label %exit, label %optional4, !prof !1
     90 optional4:
     91   call void @d()
     92   call void @d()
     93   call void @d()
     94   call void @d()
     95   br label %exit
     96 exit:
     97   ret void
     98 }
     99 
    100 ; Intended layout:
    101 ; The chain-of-triangles based duplicating produces the layout
    102 ; test1
    103 ; test2
    104 ; test3
    105 ; optional1
    106 ; optional2
    107 ; optional3
    108 ; exit
    109 ; even for 50/50 branches.
    110 ; Tail duplication puts test n+1 at the end of optional n
    111 ; so optional1 includes a copy of test2 at the end, and branches
    112 ; to test3 (at the top) or falls through to optional 2.
    113 ; The CHECK statements check for the whole string of tests
    114 ; and then check that the correct test has been duplicated into the end of
    115 ; the optional blocks and that the optional blocks are in the correct order.
    116 ;CHECK-LABEL: straight_test_50:
    117 ; test1 may have been merged with entry
    118 ;CHECK: mr [[TAGREG:[0-9]+]], 3
    119 ;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
    120 ;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
    121 ;CHECK-NEXT: # %test2
    122 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
    123 ;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
    124 ;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
    125 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
    126 ;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
    127 ;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
    128 ;CHECK: blr
    129 ;CHECK-NEXT: .[[OPT1LABEL]]:
    130 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
    131 ;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
    132 ;CHECK-NEXT: .[[OPT2LABEL]]:
    133 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
    134 ;CHECK-NEXT: beq 0, .[[EXITLABEL]]
    135 ;CHECK-NEXT: .[[OPT3LABEL]]:
    136 ;CHECK: b .[[EXITLABEL]]
    137 
    138 define void @straight_test_50(i32 %tag) {
    139 entry:
    140   br label %test1
    141 test1:
    142   %tagbit1 = and i32 %tag, 1
    143   %tagbit1eq0 = icmp eq i32 %tagbit1, 0
    144   br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2
    145 optional1:
    146   call void @a()
    147   br label %test2
    148 test2:
    149   %tagbit2 = and i32 %tag, 2
    150   %tagbit2eq0 = icmp eq i32 %tagbit2, 0
    151   br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2
    152 optional2:
    153   call void @b()
    154   br label %test3
    155 test3:
    156   %tagbit3 = and i32 %tag, 4
    157   %tagbit3eq0 = icmp eq i32 %tagbit3, 0
    158   br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1
    159 optional3:
    160   call void @c()
    161   br label %exit
    162 exit:
    163   ret void
    164 }
    165 
    166 ; Intended layout:
    167 ; The chain-of-triangles based duplicating produces the layout when 3
    168 ; instructions are allowed for tail-duplication.
    169 ; test1
    170 ; test2
    171 ; test3
    172 ; optional1
    173 ; optional2
    174 ; optional3
    175 ; exit
    176 ;
    177 ; Otherwise it produces the layout:
    178 ; test1
    179 ; optional1
    180 ; test2
    181 ; optional2
    182 ; test3
    183 ; optional3
    184 ; exit
    185 
    186 ;CHECK-LABEL: straight_test_3_instr_test:
    187 ; test1 may have been merged with entry
    188 ;CHECK: mr [[TAGREG:[0-9]+]], 3
    189 ;CHECK: clrlwi {{[0-9]+}}, [[TAGREG]], 30
    190 ;CHECK-NEXT: cmplwi {{[0-9]+}}, 2
    191 
    192 ;CHECK-O3-NEXT: bne 0, .[[OPT1LABEL:[_0-9A-Za-z]+]]
    193 ;CHECK-O3-NEXT: # %test2
    194 ;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
    195 ;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8
    196 ;CHECK-O3-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
    197 ;CHECK-O3-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
    198 ;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
    199 ;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32
    200 ;CHECK-O3-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
    201 ;CHECK-O3-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
    202 ;CHECK-O3: blr
    203 ;CHECK-O3-NEXT: .[[OPT1LABEL]]:
    204 ;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
    205 ;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8
    206 ;CHECK-O3-NEXT: beq 0, .[[TEST3LABEL]]
    207 ;CHECK-O3-NEXT: .[[OPT2LABEL]]:
    208 ;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
    209 ;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32
    210 ;CHECK-O3-NEXT: beq 0, .[[EXITLABEL]]
    211 ;CHECK-O3-NEXT: .[[OPT3LABEL]]:
    212 ;CHECK-O3: b .[[EXITLABEL]]
    213 
    214 ;CHECK-O2-NEXT: beq 0, .[[TEST2LABEL:[_0-9A-Za-z]+]]
    215 ;CHECK-O2-NEXT: # %optional1
    216 ;CHECK-O2: .[[TEST2LABEL]]: # %test2
    217 ;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
    218 ;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 8
    219 ;CHECK-O2-NEXT: beq 0, .[[TEST3LABEL:[_0-9A-Za-z]+]]
    220 ;CHECK-O2-NEXT: # %optional2
    221 ;CHECK-O2: .[[TEST3LABEL]]: # %test3
    222 ;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
    223 ;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 32
    224 ;CHECK-O2-NEXT: beq 0, .[[EXITLABEL:[_0-9A-Za-z]+]]
    225 ;CHECK-O2-NEXT: # %optional3
    226 ;CHECK-O2: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
    227 ;CHECK-O2: blr
    228 
    229 
    230 define void @straight_test_3_instr_test(i32 %tag) {
    231 entry:
    232   br label %test1
    233 test1:
    234   %tagbit1 = and i32 %tag, 3
    235   %tagbit1eq0 = icmp eq i32 %tagbit1, 2
    236   br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2
    237 optional1:
    238   call void @a()
    239   br label %test2
    240 test2:
    241   %tagbit2 = and i32 %tag, 12
    242   %tagbit2eq0 = icmp eq i32 %tagbit2, 8
    243   br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2
    244 optional2:
    245   call void @b()
    246   br label %test3
    247 test3:
    248   %tagbit3 = and i32 %tag, 48
    249   %tagbit3eq0 = icmp eq i32 %tagbit3, 32
    250   br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1
    251 optional3:
    252   call void @c()
    253   br label %exit
    254 exit:
    255   ret void
    256 }
    257 
    258 ; Intended layout:
    259 ; The chain-based outlining produces the layout
    260 ; entry
    261 ; --- Begin loop ---
    262 ; for.latch
    263 ; for.check
    264 ; test1
    265 ; test2
    266 ; test3
    267 ; test4
    268 ; optional1
    269 ; optional2
    270 ; optional3
    271 ; optional4
    272 ; --- End loop ---
    273 ; exit
    274 ; The CHECK statements check for the whole string of tests and exit block,
    275 ; and then check that the correct test has been duplicated into the end of
    276 ; the optional blocks and that the optional blocks are in the correct order.
    277 ;CHECK-LABEL: loop_test:
    278 ;CHECK: add [[TAGPTRREG:[0-9]+]], 3, 4
    279 ;CHECK: .[[LATCHLABEL:[._0-9A-Za-z]+]]: # %for.latch
    280 ;CHECK: addi
    281 ;CHECK-O2: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check
    282 ;CHECK: lwz [[TAGREG:[0-9]+]], 0([[TAGPTRREG]])
    283 ;CHECK-O3: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check
    284 ;CHECK: # %bb.{{[0-9]+}}: # %test1
    285 ;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
    286 ;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]]
    287 ;CHECK-NEXT: # %test2
    288 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
    289 ;CHECK-NEXT: bne 0, .[[OPT2LABEL:[._0-9A-Za-z]+]]
    290 ;CHECK-NEXT: .[[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3
    291 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
    292 ;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]]
    293 ;CHECK-NEXT: .[[TEST4LABEL:[._0-9A-Za-z]+]]: # %{{(test4|optional3)}}
    294 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
    295 ;CHECK-NEXT: beq 0, .[[LATCHLABEL]]
    296 ;CHECK-NEXT: b .[[OPT4LABEL:[._0-9A-Za-z]+]]
    297 ;CHECK: [[OPT1LABEL]]
    298 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
    299 ;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
    300 ;CHECK-NEXT: .[[OPT2LABEL]]
    301 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
    302 ;CHECK-NEXT: beq 0, .[[TEST4LABEL]]
    303 ;CHECK-NEXT: .[[OPT3LABEL]]
    304 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
    305 ;CHECK-NEXT: beq 0, .[[LATCHLABEL]]
    306 ;CHECK: [[OPT4LABEL]]:
    307 ;CHECK: b .[[LATCHLABEL]]
    308 define void @loop_test(i32* %tags, i32 %count) {
    309 entry:
    310   br label %for.check
    311 for.check:
    312   %count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch]
    313   %done.count = icmp ugt i32 %count.loop, 0
    314   %tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count
    315   %tag = load i32, i32* %tag_ptr
    316   %done.tag = icmp eq i32 %tag, 0
    317   %done = and i1 %done.count, %done.tag
    318   br i1 %done, label %test1, label %exit, !prof !1
    319 test1:
    320   %tagbit1 = and i32 %tag, 1
    321   %tagbit1eq0 = icmp eq i32 %tagbit1, 0
    322   br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
    323 optional1:
    324   call void @a()
    325   call void @a()
    326   call void @a()
    327   call void @a()
    328   br label %test2
    329 test2:
    330   %tagbit2 = and i32 %tag, 2
    331   %tagbit2eq0 = icmp eq i32 %tagbit2, 0
    332   br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
    333 optional2:
    334   call void @b()
    335   call void @b()
    336   call void @b()
    337   call void @b()
    338   br label %test3
    339 test3:
    340   %tagbit3 = and i32 %tag, 4
    341   %tagbit3eq0 = icmp eq i32 %tagbit3, 0
    342   br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
    343 optional3:
    344   call void @c()
    345   call void @c()
    346   call void @c()
    347   call void @c()
    348   br label %test4
    349 test4:
    350   %tagbit4 = and i32 %tag, 8
    351   %tagbit4eq0 = icmp eq i32 %tagbit4, 0
    352   br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1
    353 optional4:
    354   call void @d()
    355   call void @d()
    356   call void @d()
    357   call void @d()
    358   br label %for.latch
    359 for.latch:
    360   %count.sub = sub i32 %count.loop, 1
    361   br label %for.check
    362 exit:
    363   ret void
    364 }
    365 
    366 ; The block then2 is not unavoidable, meaning it does not dominate the exit.
    367 ; But since it can be tail-duplicated, it should be placed as a fallthrough from
    368 ; test2 and copied. The purpose here is to make sure that the tail-duplication
    369 ; code is independent of the outlining code, which works by choosing the
    370 ; "unavoidable" blocks.
    371 ; CHECK-LABEL: avoidable_test:
    372 ; CHECK: # %bb.{{[0-9]+}}: # %entry
    373 ; CHECK: andi.
    374 ; CHECK: # %bb.{{[0-9]+}}: # %test2
    375 ; Make sure then2 falls through from test2
    376 ; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}}
    377 ; CHECK: # %bb.{{[0-9]+}}: # %then2
    378 ; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29
    379 ; CHECK: # %else1
    380 ; CHECK: bl a
    381 ; CHECK: bl a
    382 ; Make sure then2 was copied into else1
    383 ; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29
    384 ; CHECK: # %end1
    385 ; CHECK: bl d
    386 ; CHECK: # %else2
    387 ; CHECK: bl c
    388 ; CHECK: # %end2
    389 define void @avoidable_test(i32 %tag) {
    390 entry:
    391   br label %test1
    392 test1:
    393   %tagbit1 = and i32 %tag, 1
    394   %tagbit1eq0 = icmp eq i32 %tagbit1, 0
    395   br i1 %tagbit1eq0, label %test2, label %else1, !prof !1 ; %test2 more likely
    396 else1:
    397   call void @a()
    398   call void @a()
    399   br label %then2
    400 test2:
    401   %tagbit2 = and i32 %tag, 2
    402   %tagbit2eq0 = icmp eq i32 %tagbit2, 0
    403   br i1 %tagbit2eq0, label %then2, label %else2, !prof !1 ; %then2 more likely
    404 then2:
    405   %tagbit3 = and i32 %tag, 4
    406   %tagbit3eq0 = icmp eq i32 %tagbit3, 0
    407   br i1 %tagbit3eq0, label %end2, label %end1, !prof !1 ; %end2 more likely
    408 else2:
    409   call void @c()
    410   br label %end2
    411 end2:
    412   ret void
    413 end1:
    414   call void @d()
    415   ret void
    416 }
    417 
    418 ; CHECK-LABEL: trellis_test
    419 ; The number in the block labels is the expected block frequency given the
    420 ; probabilities annotated. There is a conflict in the b;c->d;e trellis that
    421 ; should be resolved as c->e;b->d.
    422 ; The d;e->f;g trellis should be resolved as e->g;d->f.
    423 ; The f;g->h;i trellis should be resolved as f->i;g->h.
    424 ; The h;i->j;ret trellis contains a triangle edge, and should be resolved as
    425 ; h->j->ret
    426 ; CHECK: # %bb.{{[0-9]+}}: # %entry
    427 ; CHECK: # %bb.{{[0-9]+}}: # %c10
    428 ; CHECK: # %e9
    429 ; CHECK: # %g10
    430 ; CHECK: # %h10
    431 ; CHECK: # %j8
    432 ; CHECK: # %ret
    433 ; CHECK: # %b6
    434 ; CHECK: # %d7
    435 ; CHECK: # %f6
    436 ; CHECK: # %i6
    437 define void @trellis_test(i32 %tag) {
    438 entry:
    439   br label %a16
    440 a16:
    441   call void @a()
    442   call void @a()
    443   %tagbits.a = and i32 %tag, 3
    444   %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
    445   br i1 %tagbits.a.eq0, label %c10, label %b6, !prof !1 ; 10 to 6
    446 c10:
    447   call void @c()
    448   call void @c()
    449   %tagbits.c = and i32 %tag, 12
    450   %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
    451   ; Both of these edges should be hotter than the other incoming edge
    452   ; for e9 or d7
    453   br i1 %tagbits.c.eq0, label %e9, label %d7, !prof !3 ; 6 to 4
    454 e9:
    455   call void @e()
    456   call void @e()
    457   %tagbits.e = and i32 %tag, 48
    458   %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0
    459   br i1 %tagbits.e.eq0, label %g10, label %f6, !prof !4 ; 7 to 2
    460 g10:
    461   call void @g()
    462   call void @g()
    463   %tagbits.g = and i32 %tag, 192
    464   %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0
    465   br i1 %tagbits.g.eq0, label %i6, label %h10, !prof !5 ; 2 to 8
    466 i6:
    467   call void @i()
    468   call void @i()
    469   %tagbits.i = and i32 %tag, 768
    470   %tagbits.i.eq0 = icmp eq i32 %tagbits.i, 0
    471   br i1 %tagbits.i.eq0, label %ret, label %j8, !prof !2 ; balanced (3 to 3)
    472 b6:
    473   call void @b()
    474   call void @b()
    475   %tagbits.b = and i32 %tag, 12
    476   %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
    477   br i1 %tagbits.b.eq1, label %e9, label %d7, !prof !2 ; balanced (3 to 3)
    478 d7:
    479   call void @d()
    480   call void @d()
    481   %tagbits.d = and i32 %tag, 48
    482   %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
    483   br i1 %tagbits.d.eq1, label %g10, label %f6, !prof !6 ; 3 to 4
    484 f6:
    485   call void @f()
    486   call void @f()
    487   %tagbits.f = and i32 %tag, 192
    488   %tagbits.f.eq1 = icmp eq i32 %tagbits.f, 128
    489   br i1 %tagbits.f.eq1, label %i6, label %h10, !prof !7 ; 4 to 2
    490 h10:
    491   call void @h()
    492   call void @h()
    493   %tagbits.h = and i32 %tag, 768
    494   %tagbits.h.eq1 = icmp eq i32 %tagbits.h, 512
    495   br i1 %tagbits.h.eq1, label %ret, label %j8, !prof !2 ; balanced (5 to 5)
    496 j8:
    497   call void @j()
    498   call void @j()
    499   br label %ret
    500 ret:
    501   ret void
    502 }
    503 
    504 ; Verify that we still consider tail-duplication opportunities if we find a
    505 ; triangle trellis. Here D->F->G is the triangle, and D;E are both predecessors
    506 ; of both F and G. The basic trellis algorithm picks the F->G edge, but after
    507 ; checking, it's profitable to duplicate G into F. The weights here are not
    508 ; really important. They are there to help make the test stable.
    509 ; CHECK-LABEL: trellis_then_dup_test
    510 ; CHECK: # %bb.{{[0-9]+}}: # %entry
    511 ; CHECK: # %bb.{{[0-9]+}}: # %b
    512 ; CHECK: # %d
    513 ; CHECK: # %g
    514 ; CHECK: # %ret1
    515 ; CHECK: # %c
    516 ; CHECK: # %e
    517 ; CHECK: # %f
    518 ; CHECK: # %ret2
    519 ; CHECK: # %ret
    520 define void @trellis_then_dup_test(i32 %tag) {
    521 entry:
    522   br label %a
    523 a:
    524   call void @a()
    525   call void @a()
    526   %tagbits.a = and i32 %tag, 3
    527   %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
    528   br i1 %tagbits.a.eq0, label %b, label %c, !prof !1 ; 5 to 3
    529 b:
    530   call void @b()
    531   call void @b()
    532   %tagbits.b = and i32 %tag, 12
    533   %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
    534   br i1 %tagbits.b.eq1, label %d, label %e, !prof !1 ; 5 to 3
    535 d:
    536   call void @d()
    537   call void @d()
    538   %tagbits.d = and i32 %tag, 48
    539   %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
    540   br i1 %tagbits.d.eq1, label %g, label %f, !prof !1 ; 5 to 3
    541 f:
    542   call void @f()
    543   call void @f()
    544   br label %g
    545 g:
    546   %tagbits.g = and i32 %tag, 192
    547   %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0
    548   br i1 %tagbits.g.eq0, label %ret1, label %ret2, !prof !2 ; balanced
    549 c:
    550   call void @c()
    551   call void @c()
    552   %tagbits.c = and i32 %tag, 12
    553   %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
    554   br i1 %tagbits.c.eq0, label %d, label %e, !prof !1 ; 5 to 3
    555 e:
    556   call void @e()
    557   call void @e()
    558   %tagbits.e = and i32 %tag, 48
    559   %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0
    560   br i1 %tagbits.e.eq0, label %g, label %f, !prof !1 ; 5 to 3
    561 ret1:
    562   call void @a()
    563   br label %ret
    564 ret2:
    565   call void @b()
    566   br label %ret
    567 ret:
    568   ret void
    569 }
    570 
    571 ; Verify that we did not mis-identify triangle trellises if it is not
    572 ; really a triangle.
    573 ; CHECK-LABEL: trellis_no_triangle
    574 ; CHECK: # %bb.{{[0-9]+}}: # %entry
    575 ; CHECK: # %bb.{{[0-9]+}}: # %b
    576 ; CHECK: # %d
    577 ; CHECK: # %ret
    578 ; CHECK: # %c
    579 ; CHECK: # %e
    580 define void @trellis_no_triangle(i32 %tag) {
    581 entry:
    582   br label %a
    583 a:
    584   call void @a()
    585   call void @a()
    586   %tagbits.a = and i32 %tag, 3
    587   %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
    588   br i1 %tagbits.a.eq0, label %b, label %c, !prof !8 ; 98 to 2
    589 b:
    590   call void @b()
    591   call void @b()
    592   %tagbits.b = and i32 %tag, 12
    593   %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
    594   br i1 %tagbits.b.eq1, label %d, label %e, !prof !9 ; 97 to 1
    595 d:
    596   call void @d()
    597   call void @d()
    598   %tagbits.d = and i32 %tag, 48
    599   %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
    600   br i1 %tagbits.d.eq1, label %ret, label %e, !prof !10 ; 96 to 2
    601 c:
    602   call void @c()
    603   call void @c()
    604   %tagbits.c = and i32 %tag, 12
    605   %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
    606   br i1 %tagbits.c.eq0, label %d, label %e, !prof !2 ; 1 to 1
    607 e:
    608   call void @e()
    609   call void @e()
    610   br label %ret
    611 ret:
    612   call void @f()
    613   ret void
    614 }
    615 
    616 declare void @a()
    617 declare void @b()
    618 declare void @c()
    619 declare void @d()
    620 declare void @e()
    621 declare void @f()
    622 declare void @g()
    623 declare void @h()
    624 declare void @i()
    625 declare void @j()
    626 
    627 !1 = !{!"branch_weights", i32 5, i32 3}
    628 !2 = !{!"branch_weights", i32 50, i32 50}
    629 !3 = !{!"branch_weights", i32 6, i32 4}
    630 !4 = !{!"branch_weights", i32 7, i32 2}
    631 !5 = !{!"branch_weights", i32 2, i32 8}
    632 !6 = !{!"branch_weights", i32 3, i32 4}
    633 !7 = !{!"branch_weights", i32 4, i32 2}
    634 !8 = !{!"branch_weights", i32 98, i32 2}
    635 !9 = !{!"branch_weights", i32 97, i32 1}
    636 !10 = !{!"branch_weights", i32 96, i32 2}
    637