Home | History | Annotate | Download | only in LoopUnroll
      1 ; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll 2>&1 | FileCheck %s
      2 ; RUN: opt < %s -S -debug-only=loop-unroll -passes='require<profile-summary>,function(require<opt-remark-emit>,unroll)' 2>&1 | FileCheck %s
      3 ; Confirm that peeling is disabled if the number of counts required to reach
      4 ; the hot percentile is above the threshold.
      5 ; RUN: opt < %s -S -profile-summary-huge-working-set-size-threshold=9 -debug-only=loop-unroll -passes='require<profile-summary>,function(require<opt-remark-emit>,unroll)' 2>&1 | FileCheck %s --check-prefix=NOPEEL
      6 ; REQUIRES: asserts
      7 
      8 ; Make sure we use the profile information correctly to peel-off 3 iterations
      9 ; from the loop, and update the branch weights for the peeled loop properly.
     10 
     11 ; CHECK: Loop Unroll: F[basic]
     12 ; CHECK: PEELING loop %for.body with iteration count 3!
     13 ; CHECK: Loop Unroll: F[optsize]
     14 ; CHECK-NOT: PEELING
     15 
     16 ; Confirm that no peeling occurs when we are performing full unrolling.
     17 ; RUN: opt < %s -S -debug-only=loop-unroll -passes='require<opt-remark-emit>,loop(unroll-full)' 2>&1 | FileCheck %s --check-prefix=NOPEEL
     18 ; NOPEEL-NOT: PEELING
     19 
     20 ; CHECK-LABEL: @basic
     21 ; CHECK: br i1 %{{.*}}, label %[[NEXT0:.*]], label %for.cond.for.end_crit_edge, !prof !15
     22 ; CHECK: [[NEXT0]]:
     23 ; CHECK: br i1 %{{.*}}, label %[[NEXT1:.*]], label %for.cond.for.end_crit_edge, !prof !16
     24 ; CHECK: [[NEXT1]]:
     25 ; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !17
     26 ; CHECK: [[NEXT2]]:
     27 ; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !18
     28 
     29 define void @basic(i32* %p, i32 %k) #0 !prof !15 {
     30 entry:
     31   %cmp3 = icmp slt i32 0, %k
     32   br i1 %cmp3, label %for.body.lr.ph, label %for.end
     33 
     34 for.body.lr.ph:                                   ; preds = %entry
     35   br label %for.body
     36 
     37 for.body:                                         ; preds = %for.body.lr.ph, %for.body
     38   %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
     39   %p.addr.04 = phi i32* [ %p, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
     40   %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.04, i32 1
     41   store i32 %i.05, i32* %p.addr.04, align 4
     42   %inc = add nsw i32 %i.05, 1
     43   %cmp = icmp slt i32 %inc, %k
     44   br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge, !prof !16
     45 
     46 for.cond.for.end_crit_edge:                       ; preds = %for.body
     47   br label %for.end
     48 
     49 for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
     50   ret void
     51 }
     52 
     53 ; We don't want to peel loops when optimizing for size.
     54 ; CHECK-LABEL: @optsize
     55 ; CHECK: for.body.lr.ph:
     56 ; CHECK-NEXT: br label %for.body
     57 ; CHECK: for.body:
     58 ; CHECK-NOT: br
     59 ; CHECK: br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
     60 define void @optsize(i32* %p, i32 %k) #1 !prof !15 {
     61 entry:
     62   %cmp3 = icmp slt i32 0, %k
     63   br i1 %cmp3, label %for.body.lr.ph, label %for.end
     64 
     65 for.body.lr.ph:                                   ; preds = %entry
     66   br label %for.body
     67 
     68 for.body:                                         ; preds = %for.body.lr.ph, %for.body
     69   %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
     70   %p.addr.04 = phi i32* [ %p, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
     71   %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.04, i32 1
     72   store i32 %i.05, i32* %p.addr.04, align 4
     73   %inc = add nsw i32 %i.05, 1
     74   %cmp = icmp slt i32 %inc, %k
     75   br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge, !prof !16
     76 
     77 for.cond.for.end_crit_edge:                       ; preds = %for.body
     78   br label %for.end
     79 
     80 for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
     81   ret void
     82 }
     83 
     84 attributes #0 = { nounwind }
     85 attributes #1 = { nounwind optsize }
     86 
     87 !llvm.module.flags = !{!1}
     88 
     89 !1 = !{i32 1, !"ProfileSummary", !2}
     90 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
     91 !3 = !{!"ProfileFormat", !"InstrProf"}
     92 !4 = !{!"TotalCount", i64 10}
     93 !5 = !{!"MaxCount", i64 3}
     94 !6 = !{!"MaxInternalCount", i64 1}
     95 !7 = !{!"MaxFunctionCount", i64 3}
     96 !8 = !{!"NumCounts", i64 2}
     97 !9 = !{!"NumFunctions", i64 2}
     98 !10 = !{!"DetailedSummary", !11}
     99 !11 = !{!12, !13, !14}
    100 !12 = !{i32 10000, i64 3, i32 2}
    101 !13 = !{i32 999000, i64 1, i32 10}
    102 !14 = !{i32 999999, i64 1, i32 10}
    103 !15 = !{!"function_entry_count", i64 1}
    104 !16 = !{!"branch_weights", i32 3001, i32 1001}
    105 
    106 ;CHECK: !15 = !{!"branch_weights", i32 900, i32 101}
    107 ;CHECK: !16 = !{!"branch_weights", i32 540, i32 360}
    108 ;CHECK: !17 = !{!"branch_weights", i32 162, i32 378}
    109 ;CHECK: !18 = !{!"branch_weights", i32 1399, i32 162}
    110 
    111