1 ; RUN: llc -O2 -ppc-reduce-cr-logicals -o - %s | FileCheck \ 2 ; RUN: --check-prefix=CHECK --check-prefix=CHECK-O2 %s 3 ; RUN: llc -O3 -ppc-reduce-cr-logicals -o - %s | FileCheck \ 4 ; RUN: --check-prefix=CHECK --check-prefix=CHECK-O3 %s 5 target datalayout = "e-m:e-i64:64-n32:64" 6 target triple = "powerpc64le-grtev4-linux-gnu" 7 8 ; Intended layout: 9 ; The chain-based outlining produces the layout 10 ; test1 11 ; test2 12 ; test3 13 ; test4 14 ; optional1 15 ; optional2 16 ; optional3 17 ; optional4 18 ; exit 19 ; Tail duplication puts test n+1 at the end of optional n 20 ; so optional1 includes a copy of test2 at the end, and branches 21 ; to test3 (at the top) or falls through to optional 2. 22 ; The CHECK statements check for the whole string of tests 23 ; and then check that the correct test has been duplicated into the end of 24 ; the optional blocks and that the optional blocks are in the correct order. 25 ;CHECK-LABEL: straight_test: 26 ; test1 may have been merged with entry 27 ;CHECK: mr [[TAGREG:[0-9]+]], 3 28 ;CHECK: andi. {{[0-9]+}}, [[TAGREG:[0-9]+]], 1 29 ;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] 30 ;CHECK-NEXT: # %test2 31 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 32 ;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] 33 ;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3 34 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 35 ;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]] 36 ;CHECK-NEXT: .[[TEST4LABEL:[_0-9A-Za-z]+]]: # %test4 37 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 38 ;CHECK-NEXT: bne 0, .[[OPT4LABEL:[_0-9A-Za-z]+]] 39 ;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit 40 ;CHECK: blr 41 ;CHECK-NEXT: .[[OPT1LABEL]]: 42 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 43 ;CHECK-NEXT: beq 0, .[[TEST3LABEL]] 44 ;CHECK-NEXT: .[[OPT2LABEL]]: 45 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 46 ;CHECK-NEXT: beq 0, .[[TEST4LABEL]] 47 ;CHECK-NEXT: .[[OPT3LABEL]]: 48 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 49 ;CHECK-NEXT: beq 0, .[[EXITLABEL]] 50 ;CHECK-NEXT: .[[OPT4LABEL]]: 51 ;CHECK: b .[[EXITLABEL]] 52 53 define void @straight_test(i32 %tag) { 54 entry: 55 br label %test1 56 test1: 57 %tagbit1 = and i32 %tag, 1 58 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 59 br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1 60 optional1: 61 call void @a() 62 call void @a() 63 call void @a() 64 call void @a() 65 br label %test2 66 test2: 67 %tagbit2 = and i32 %tag, 2 68 %tagbit2eq0 = icmp eq i32 %tagbit2, 0 69 br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1 70 optional2: 71 call void @b() 72 call void @b() 73 call void @b() 74 call void @b() 75 br label %test3 76 test3: 77 %tagbit3 = and i32 %tag, 4 78 %tagbit3eq0 = icmp eq i32 %tagbit3, 0 79 br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1 80 optional3: 81 call void @c() 82 call void @c() 83 call void @c() 84 call void @c() 85 br label %test4 86 test4: 87 %tagbit4 = and i32 %tag, 8 88 %tagbit4eq0 = icmp eq i32 %tagbit4, 0 89 br i1 %tagbit4eq0, label %exit, label %optional4, !prof !1 90 optional4: 91 call void @d() 92 call void @d() 93 call void @d() 94 call void @d() 95 br label %exit 96 exit: 97 ret void 98 } 99 100 ; Intended layout: 101 ; The chain-of-triangles based duplicating produces the layout 102 ; test1 103 ; test2 104 ; test3 105 ; optional1 106 ; optional2 107 ; optional3 108 ; exit 109 ; even for 50/50 branches. 110 ; Tail duplication puts test n+1 at the end of optional n 111 ; so optional1 includes a copy of test2 at the end, and branches 112 ; to test3 (at the top) or falls through to optional 2. 113 ; The CHECK statements check for the whole string of tests 114 ; and then check that the correct test has been duplicated into the end of 115 ; the optional blocks and that the optional blocks are in the correct order. 116 ;CHECK-LABEL: straight_test_50: 117 ; test1 may have been merged with entry 118 ;CHECK: mr [[TAGREG:[0-9]+]], 3 119 ;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 120 ;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] 121 ;CHECK-NEXT: # %test2 122 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 123 ;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] 124 ;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3 125 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 126 ;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]] 127 ;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit 128 ;CHECK: blr 129 ;CHECK-NEXT: .[[OPT1LABEL]]: 130 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 131 ;CHECK-NEXT: beq 0, .[[TEST3LABEL]] 132 ;CHECK-NEXT: .[[OPT2LABEL]]: 133 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 134 ;CHECK-NEXT: beq 0, .[[EXITLABEL]] 135 ;CHECK-NEXT: .[[OPT3LABEL]]: 136 ;CHECK: b .[[EXITLABEL]] 137 138 define void @straight_test_50(i32 %tag) { 139 entry: 140 br label %test1 141 test1: 142 %tagbit1 = and i32 %tag, 1 143 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 144 br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2 145 optional1: 146 call void @a() 147 br label %test2 148 test2: 149 %tagbit2 = and i32 %tag, 2 150 %tagbit2eq0 = icmp eq i32 %tagbit2, 0 151 br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2 152 optional2: 153 call void @b() 154 br label %test3 155 test3: 156 %tagbit3 = and i32 %tag, 4 157 %tagbit3eq0 = icmp eq i32 %tagbit3, 0 158 br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1 159 optional3: 160 call void @c() 161 br label %exit 162 exit: 163 ret void 164 } 165 166 ; Intended layout: 167 ; The chain-of-triangles based duplicating produces the layout when 3 168 ; instructions are allowed for tail-duplication. 169 ; test1 170 ; test2 171 ; test3 172 ; optional1 173 ; optional2 174 ; optional3 175 ; exit 176 ; 177 ; Otherwise it produces the layout: 178 ; test1 179 ; optional1 180 ; test2 181 ; optional2 182 ; test3 183 ; optional3 184 ; exit 185 186 ;CHECK-LABEL: straight_test_3_instr_test: 187 ; test1 may have been merged with entry 188 ;CHECK: mr [[TAGREG:[0-9]+]], 3 189 ;CHECK: clrlwi {{[0-9]+}}, [[TAGREG]], 30 190 ;CHECK-NEXT: cmplwi {{[0-9]+}}, 2 191 192 ;CHECK-O3-NEXT: bne 0, .[[OPT1LABEL:[_0-9A-Za-z]+]] 193 ;CHECK-O3-NEXT: # %test2 194 ;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29 195 ;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8 196 ;CHECK-O3-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] 197 ;CHECK-O3-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3 198 ;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27 199 ;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32 200 ;CHECK-O3-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]] 201 ;CHECK-O3-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit 202 ;CHECK-O3: blr 203 ;CHECK-O3-NEXT: .[[OPT1LABEL]]: 204 ;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29 205 ;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8 206 ;CHECK-O3-NEXT: beq 0, .[[TEST3LABEL]] 207 ;CHECK-O3-NEXT: .[[OPT2LABEL]]: 208 ;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27 209 ;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32 210 ;CHECK-O3-NEXT: beq 0, .[[EXITLABEL]] 211 ;CHECK-O3-NEXT: .[[OPT3LABEL]]: 212 ;CHECK-O3: b .[[EXITLABEL]] 213 214 ;CHECK-O2-NEXT: beq 0, .[[TEST2LABEL:[_0-9A-Za-z]+]] 215 ;CHECK-O2-NEXT: # %optional1 216 ;CHECK-O2: .[[TEST2LABEL]]: # %test2 217 ;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29 218 ;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 8 219 ;CHECK-O2-NEXT: beq 0, .[[TEST3LABEL:[_0-9A-Za-z]+]] 220 ;CHECK-O2-NEXT: # %optional2 221 ;CHECK-O2: .[[TEST3LABEL]]: # %test3 222 ;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27 223 ;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 32 224 ;CHECK-O2-NEXT: beq 0, .[[EXITLABEL:[_0-9A-Za-z]+]] 225 ;CHECK-O2-NEXT: # %optional3 226 ;CHECK-O2: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit 227 ;CHECK-O2: blr 228 229 230 define void @straight_test_3_instr_test(i32 %tag) { 231 entry: 232 br label %test1 233 test1: 234 %tagbit1 = and i32 %tag, 3 235 %tagbit1eq0 = icmp eq i32 %tagbit1, 2 236 br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2 237 optional1: 238 call void @a() 239 br label %test2 240 test2: 241 %tagbit2 = and i32 %tag, 12 242 %tagbit2eq0 = icmp eq i32 %tagbit2, 8 243 br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2 244 optional2: 245 call void @b() 246 br label %test3 247 test3: 248 %tagbit3 = and i32 %tag, 48 249 %tagbit3eq0 = icmp eq i32 %tagbit3, 32 250 br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1 251 optional3: 252 call void @c() 253 br label %exit 254 exit: 255 ret void 256 } 257 258 ; Intended layout: 259 ; The chain-based outlining produces the layout 260 ; entry 261 ; --- Begin loop --- 262 ; for.latch 263 ; for.check 264 ; test1 265 ; test2 266 ; test3 267 ; test4 268 ; optional1 269 ; optional2 270 ; optional3 271 ; optional4 272 ; --- End loop --- 273 ; exit 274 ; The CHECK statements check for the whole string of tests and exit block, 275 ; and then check that the correct test has been duplicated into the end of 276 ; the optional blocks and that the optional blocks are in the correct order. 277 ;CHECK-LABEL: loop_test: 278 ;CHECK: add [[TAGPTRREG:[0-9]+]], 3, 4 279 ;CHECK: .[[LATCHLABEL:[._0-9A-Za-z]+]]: # %for.latch 280 ;CHECK: addi 281 ;CHECK-O2: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check 282 ;CHECK: lwz [[TAGREG:[0-9]+]], 0([[TAGPTRREG]]) 283 ;CHECK-O3: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check 284 ;CHECK: # %bb.{{[0-9]+}}: # %test1 285 ;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 286 ;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]] 287 ;CHECK-NEXT: # %test2 288 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 289 ;CHECK-NEXT: bne 0, .[[OPT2LABEL:[._0-9A-Za-z]+]] 290 ;CHECK-NEXT: .[[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3 291 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 292 ;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]] 293 ;CHECK-NEXT: .[[TEST4LABEL:[._0-9A-Za-z]+]]: # %{{(test4|optional3)}} 294 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 295 ;CHECK-NEXT: beq 0, .[[LATCHLABEL]] 296 ;CHECK-NEXT: b .[[OPT4LABEL:[._0-9A-Za-z]+]] 297 ;CHECK: [[OPT1LABEL]] 298 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 299 ;CHECK-NEXT: beq 0, .[[TEST3LABEL]] 300 ;CHECK-NEXT: .[[OPT2LABEL]] 301 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 302 ;CHECK-NEXT: beq 0, .[[TEST4LABEL]] 303 ;CHECK-NEXT: .[[OPT3LABEL]] 304 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 305 ;CHECK-NEXT: beq 0, .[[LATCHLABEL]] 306 ;CHECK: [[OPT4LABEL]]: 307 ;CHECK: b .[[LATCHLABEL]] 308 define void @loop_test(i32* %tags, i32 %count) { 309 entry: 310 br label %for.check 311 for.check: 312 %count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch] 313 %done.count = icmp ugt i32 %count.loop, 0 314 %tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count 315 %tag = load i32, i32* %tag_ptr 316 %done.tag = icmp eq i32 %tag, 0 317 %done = and i1 %done.count, %done.tag 318 br i1 %done, label %test1, label %exit, !prof !1 319 test1: 320 %tagbit1 = and i32 %tag, 1 321 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 322 br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1 323 optional1: 324 call void @a() 325 call void @a() 326 call void @a() 327 call void @a() 328 br label %test2 329 test2: 330 %tagbit2 = and i32 %tag, 2 331 %tagbit2eq0 = icmp eq i32 %tagbit2, 0 332 br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1 333 optional2: 334 call void @b() 335 call void @b() 336 call void @b() 337 call void @b() 338 br label %test3 339 test3: 340 %tagbit3 = and i32 %tag, 4 341 %tagbit3eq0 = icmp eq i32 %tagbit3, 0 342 br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1 343 optional3: 344 call void @c() 345 call void @c() 346 call void @c() 347 call void @c() 348 br label %test4 349 test4: 350 %tagbit4 = and i32 %tag, 8 351 %tagbit4eq0 = icmp eq i32 %tagbit4, 0 352 br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1 353 optional4: 354 call void @d() 355 call void @d() 356 call void @d() 357 call void @d() 358 br label %for.latch 359 for.latch: 360 %count.sub = sub i32 %count.loop, 1 361 br label %for.check 362 exit: 363 ret void 364 } 365 366 ; The block then2 is not unavoidable, meaning it does not dominate the exit. 367 ; But since it can be tail-duplicated, it should be placed as a fallthrough from 368 ; test2 and copied. The purpose here is to make sure that the tail-duplication 369 ; code is independent of the outlining code, which works by choosing the 370 ; "unavoidable" blocks. 371 ; CHECK-LABEL: avoidable_test: 372 ; CHECK: # %bb.{{[0-9]+}}: # %entry 373 ; CHECK: andi. 374 ; CHECK: # %bb.{{[0-9]+}}: # %test2 375 ; Make sure then2 falls through from test2 376 ; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} 377 ; CHECK: # %bb.{{[0-9]+}}: # %then2 378 ; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29 379 ; CHECK: # %else1 380 ; CHECK: bl a 381 ; CHECK: bl a 382 ; Make sure then2 was copied into else1 383 ; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29 384 ; CHECK: # %end1 385 ; CHECK: bl d 386 ; CHECK: # %else2 387 ; CHECK: bl c 388 ; CHECK: # %end2 389 define void @avoidable_test(i32 %tag) { 390 entry: 391 br label %test1 392 test1: 393 %tagbit1 = and i32 %tag, 1 394 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 395 br i1 %tagbit1eq0, label %test2, label %else1, !prof !1 ; %test2 more likely 396 else1: 397 call void @a() 398 call void @a() 399 br label %then2 400 test2: 401 %tagbit2 = and i32 %tag, 2 402 %tagbit2eq0 = icmp eq i32 %tagbit2, 0 403 br i1 %tagbit2eq0, label %then2, label %else2, !prof !1 ; %then2 more likely 404 then2: 405 %tagbit3 = and i32 %tag, 4 406 %tagbit3eq0 = icmp eq i32 %tagbit3, 0 407 br i1 %tagbit3eq0, label %end2, label %end1, !prof !1 ; %end2 more likely 408 else2: 409 call void @c() 410 br label %end2 411 end2: 412 ret void 413 end1: 414 call void @d() 415 ret void 416 } 417 418 ; CHECK-LABEL: trellis_test 419 ; The number in the block labels is the expected block frequency given the 420 ; probabilities annotated. There is a conflict in the b;c->d;e trellis that 421 ; should be resolved as c->e;b->d. 422 ; The d;e->f;g trellis should be resolved as e->g;d->f. 423 ; The f;g->h;i trellis should be resolved as f->i;g->h. 424 ; The h;i->j;ret trellis contains a triangle edge, and should be resolved as 425 ; h->j->ret 426 ; CHECK: # %bb.{{[0-9]+}}: # %entry 427 ; CHECK: # %bb.{{[0-9]+}}: # %c10 428 ; CHECK: # %e9 429 ; CHECK: # %g10 430 ; CHECK: # %h10 431 ; CHECK: # %j8 432 ; CHECK: # %ret 433 ; CHECK: # %b6 434 ; CHECK: # %d7 435 ; CHECK: # %f6 436 ; CHECK: # %i6 437 define void @trellis_test(i32 %tag) { 438 entry: 439 br label %a16 440 a16: 441 call void @a() 442 call void @a() 443 %tagbits.a = and i32 %tag, 3 444 %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0 445 br i1 %tagbits.a.eq0, label %c10, label %b6, !prof !1 ; 10 to 6 446 c10: 447 call void @c() 448 call void @c() 449 %tagbits.c = and i32 %tag, 12 450 %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0 451 ; Both of these edges should be hotter than the other incoming edge 452 ; for e9 or d7 453 br i1 %tagbits.c.eq0, label %e9, label %d7, !prof !3 ; 6 to 4 454 e9: 455 call void @e() 456 call void @e() 457 %tagbits.e = and i32 %tag, 48 458 %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0 459 br i1 %tagbits.e.eq0, label %g10, label %f6, !prof !4 ; 7 to 2 460 g10: 461 call void @g() 462 call void @g() 463 %tagbits.g = and i32 %tag, 192 464 %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0 465 br i1 %tagbits.g.eq0, label %i6, label %h10, !prof !5 ; 2 to 8 466 i6: 467 call void @i() 468 call void @i() 469 %tagbits.i = and i32 %tag, 768 470 %tagbits.i.eq0 = icmp eq i32 %tagbits.i, 0 471 br i1 %tagbits.i.eq0, label %ret, label %j8, !prof !2 ; balanced (3 to 3) 472 b6: 473 call void @b() 474 call void @b() 475 %tagbits.b = and i32 %tag, 12 476 %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8 477 br i1 %tagbits.b.eq1, label %e9, label %d7, !prof !2 ; balanced (3 to 3) 478 d7: 479 call void @d() 480 call void @d() 481 %tagbits.d = and i32 %tag, 48 482 %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32 483 br i1 %tagbits.d.eq1, label %g10, label %f6, !prof !6 ; 3 to 4 484 f6: 485 call void @f() 486 call void @f() 487 %tagbits.f = and i32 %tag, 192 488 %tagbits.f.eq1 = icmp eq i32 %tagbits.f, 128 489 br i1 %tagbits.f.eq1, label %i6, label %h10, !prof !7 ; 4 to 2 490 h10: 491 call void @h() 492 call void @h() 493 %tagbits.h = and i32 %tag, 768 494 %tagbits.h.eq1 = icmp eq i32 %tagbits.h, 512 495 br i1 %tagbits.h.eq1, label %ret, label %j8, !prof !2 ; balanced (5 to 5) 496 j8: 497 call void @j() 498 call void @j() 499 br label %ret 500 ret: 501 ret void 502 } 503 504 ; Verify that we still consider tail-duplication opportunities if we find a 505 ; triangle trellis. Here D->F->G is the triangle, and D;E are both predecessors 506 ; of both F and G. The basic trellis algorithm picks the F->G edge, but after 507 ; checking, it's profitable to duplicate G into F. The weights here are not 508 ; really important. They are there to help make the test stable. 509 ; CHECK-LABEL: trellis_then_dup_test 510 ; CHECK: # %bb.{{[0-9]+}}: # %entry 511 ; CHECK: # %bb.{{[0-9]+}}: # %b 512 ; CHECK: # %d 513 ; CHECK: # %g 514 ; CHECK: # %ret1 515 ; CHECK: # %c 516 ; CHECK: # %e 517 ; CHECK: # %f 518 ; CHECK: # %ret2 519 ; CHECK: # %ret 520 define void @trellis_then_dup_test(i32 %tag) { 521 entry: 522 br label %a 523 a: 524 call void @a() 525 call void @a() 526 %tagbits.a = and i32 %tag, 3 527 %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0 528 br i1 %tagbits.a.eq0, label %b, label %c, !prof !1 ; 5 to 3 529 b: 530 call void @b() 531 call void @b() 532 %tagbits.b = and i32 %tag, 12 533 %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8 534 br i1 %tagbits.b.eq1, label %d, label %e, !prof !1 ; 5 to 3 535 d: 536 call void @d() 537 call void @d() 538 %tagbits.d = and i32 %tag, 48 539 %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32 540 br i1 %tagbits.d.eq1, label %g, label %f, !prof !1 ; 5 to 3 541 f: 542 call void @f() 543 call void @f() 544 br label %g 545 g: 546 %tagbits.g = and i32 %tag, 192 547 %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0 548 br i1 %tagbits.g.eq0, label %ret1, label %ret2, !prof !2 ; balanced 549 c: 550 call void @c() 551 call void @c() 552 %tagbits.c = and i32 %tag, 12 553 %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0 554 br i1 %tagbits.c.eq0, label %d, label %e, !prof !1 ; 5 to 3 555 e: 556 call void @e() 557 call void @e() 558 %tagbits.e = and i32 %tag, 48 559 %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0 560 br i1 %tagbits.e.eq0, label %g, label %f, !prof !1 ; 5 to 3 561 ret1: 562 call void @a() 563 br label %ret 564 ret2: 565 call void @b() 566 br label %ret 567 ret: 568 ret void 569 } 570 571 ; Verify that we did not mis-identify triangle trellises if it is not 572 ; really a triangle. 573 ; CHECK-LABEL: trellis_no_triangle 574 ; CHECK: # %bb.{{[0-9]+}}: # %entry 575 ; CHECK: # %bb.{{[0-9]+}}: # %b 576 ; CHECK: # %d 577 ; CHECK: # %ret 578 ; CHECK: # %c 579 ; CHECK: # %e 580 define void @trellis_no_triangle(i32 %tag) { 581 entry: 582 br label %a 583 a: 584 call void @a() 585 call void @a() 586 %tagbits.a = and i32 %tag, 3 587 %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0 588 br i1 %tagbits.a.eq0, label %b, label %c, !prof !8 ; 98 to 2 589 b: 590 call void @b() 591 call void @b() 592 %tagbits.b = and i32 %tag, 12 593 %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8 594 br i1 %tagbits.b.eq1, label %d, label %e, !prof !9 ; 97 to 1 595 d: 596 call void @d() 597 call void @d() 598 %tagbits.d = and i32 %tag, 48 599 %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32 600 br i1 %tagbits.d.eq1, label %ret, label %e, !prof !10 ; 96 to 2 601 c: 602 call void @c() 603 call void @c() 604 %tagbits.c = and i32 %tag, 12 605 %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0 606 br i1 %tagbits.c.eq0, label %d, label %e, !prof !2 ; 1 to 1 607 e: 608 call void @e() 609 call void @e() 610 br label %ret 611 ret: 612 call void @f() 613 ret void 614 } 615 616 declare void @a() 617 declare void @b() 618 declare void @c() 619 declare void @d() 620 declare void @e() 621 declare void @f() 622 declare void @g() 623 declare void @h() 624 declare void @i() 625 declare void @j() 626 627 !1 = !{!"branch_weights", i32 5, i32 3} 628 !2 = !{!"branch_weights", i32 50, i32 50} 629 !3 = !{!"branch_weights", i32 6, i32 4} 630 !4 = !{!"branch_weights", i32 7, i32 2} 631 !5 = !{!"branch_weights", i32 2, i32 8} 632 !6 = !{!"branch_weights", i32 3, i32 4} 633 !7 = !{!"branch_weights", i32 4, i32 2} 634 !8 = !{!"branch_weights", i32 98, i32 2} 635 !9 = !{!"branch_weights", i32 97, i32 1} 636 !10 = !{!"branch_weights", i32 96, i32 2} 637