1 ; RUN: opt -S -loop-predication -loop-predication-skip-profitability-checks=false < %s 2>&1 | FileCheck %s 2 ; RUN: opt -S -loop-predication-skip-profitability-checks=false -passes='require<scalar-evolution>,require<branch-prob>,loop(loop-predication)' < %s 2>&1 | FileCheck %s 3 4 ; latch block exits to a speculation block. BPI already knows (without prof 5 ; data) that deopt is very rarely 6 ; taken. So we do not predicate this loop using that coarse latch check. 7 ; LatchExitProbability: 0x04000000 / 0x80000000 = 3.12% 8 ; ExitingBlockProbability: 0x7ffa572a / 0x80000000 = 99.98% 9 define i64 @donot_predicate(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) { 10 ; CHECK-LABEL: donot_predicate( 11 entry: 12 %length.ext = zext i32 %length to i64 13 %n.pre = load i64, i64* %n_addr, align 4 14 br label %Header 15 16 ; CHECK-LABEL: Header: 17 ; CHECK: %within.bounds = icmp ult i64 %j2, %length.ext 18 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) 19 Header: ; preds = %entry, %Latch 20 %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ] 21 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ] 22 %within.bounds = icmp ult i64 %j2, %length.ext 23 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] 24 %innercmp = icmp eq i64 %j2, %n.pre 25 %j.next = add nuw nsw i64 %j2, 1 26 br i1 %innercmp, label %Latch, label %exit, !prof !0 27 28 Latch: ; preds = %Header 29 %speculate_trip_count = icmp ult i64 %j.next, 1048576 30 br i1 %speculate_trip_count, label %Header, label %deopt 31 32 deopt: ; preds = %Latch 33 %counted_speculation_failed = call i64 (...) @llvm.experimental.deoptimize.i64(i64 30) [ "deopt"(i32 0) ] 34 ret i64 %counted_speculation_failed 35 36 exit: ; preds = %Header 37 %result.in3.lcssa = phi i64* [ %result.in3, %Header ] 38 %result.le = load i64, i64* %result.in3.lcssa, align 8 39 ret i64 %result.le 40 } 41 !0 = !{!"branch_weights", i32 18, i32 104200} 42 43 ; predicate loop since there's no profile information and BPI concluded all 44 ; exiting blocks have same probability of exiting from loop. 45 define i64 @predicate(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) { 46 ; CHECK-LABEL: predicate( 47 ; CHECK-LABEL: entry: 48 ; CHECK: [[limit_check:[^ ]+]] = icmp ule i64 1048576, %length.ext 49 ; CHECK-NEXT: [[first_iteration_check:[^ ]+]] = icmp ult i64 0, %length.ext 50 ; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] 51 entry: 52 %length.ext = zext i32 %length to i64 53 %n.pre = load i64, i64* %n_addr, align 4 54 br label %Header 55 56 ; CHECK-LABEL: Header: 57 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] 58 Header: ; preds = %entry, %Latch 59 %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ] 60 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ] 61 %within.bounds = icmp ult i64 %j2, %length.ext 62 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] 63 %innercmp = icmp eq i64 %j2, %n.pre 64 %j.next = add nuw nsw i64 %j2, 1 65 br i1 %innercmp, label %Latch, label %exit 66 67 Latch: ; preds = %Header 68 %speculate_trip_count = icmp ult i64 %j.next, 1048576 69 br i1 %speculate_trip_count, label %Header, label %exitLatch 70 71 exitLatch: ; preds = %Latch 72 ret i64 1 73 74 exit: ; preds = %Header 75 %result.in3.lcssa = phi i64* [ %result.in3, %Header ] 76 %result.le = load i64, i64* %result.in3.lcssa, align 8 77 ret i64 %result.le 78 } 79 80 ; Same as test above but with profiling data that the most probable exit from 81 ; the loop is the header exiting block (not the latch block). So do not predicate. 82 ; LatchExitProbability: 0x000020e1 / 0x80000000 = 0.00% 83 ; ExitingBlockProbability: 0x7ffcbb86 / 0x80000000 = 99.99% 84 define i64 @donot_predicate_prof(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) { 85 ; CHECK-LABEL: donot_predicate_prof( 86 ; CHECK-LABEL: entry: 87 entry: 88 %length.ext = zext i32 %length to i64 89 %n.pre = load i64, i64* %n_addr, align 4 90 br label %Header 91 92 ; CHECK-LABEL: Header: 93 ; CHECK: %within.bounds = icmp ult i64 %j2, %length.ext 94 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) 95 Header: ; preds = %entry, %Latch 96 %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ] 97 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ] 98 %within.bounds = icmp ult i64 %j2, %length.ext 99 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] 100 %innercmp = icmp eq i64 %j2, %n.pre 101 %j.next = add nuw nsw i64 %j2, 1 102 br i1 %innercmp, label %Latch, label %exit, !prof !1 103 104 Latch: ; preds = %Header 105 %speculate_trip_count = icmp ult i64 %j.next, 1048576 106 br i1 %speculate_trip_count, label %Header, label %exitLatch, !prof !2 107 108 exitLatch: ; preds = %Latch 109 ret i64 1 110 111 exit: ; preds = %Header 112 %result.in3.lcssa = phi i64* [ %result.in3, %Header ] 113 %result.le = load i64, i64* %result.in3.lcssa, align 8 114 ret i64 %result.le 115 } 116 declare i64 @llvm.experimental.deoptimize.i64(...) 117 declare void @llvm.experimental.guard(i1, ...) 118 119 !1 = !{!"branch_weights", i32 104, i32 1042861} 120 !2 = !{!"branch_weights", i32 255129, i32 1} 121