Home | History | Annotate | Download | only in LoopUnroll
      1 ; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s
      2 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
      3 
      4 @known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
      5 
      6 ; We should be able to propagate constant data through comparisons.
      7 ; For example, in this test we have a load, which becomes constant after
      8 ; unrolling, making comparison with 0 also known to be 0 (false) - and that
      9 ; will trigger further simplifications.
     10 ;
     11 ; We expect this loop to be unrolled, because in this case load would become
     12 ; constant, which is always 1, and which, in its turn, helps to simplify
     13 ; following comparison, zero-extension, and addition. In total, unrolling should help to
     14 ; optimize more than 50% of all instructions in this case.
     15 ;
     16 ; CHECK-LABEL: @const_compare
     17 ; CHECK-NOT: br i1 %
     18 ; CHECK: ret i32
     19 define i32 @const_compare(i32* noalias nocapture readonly %b) {
     20 entry:
     21   br label %for.body
     22 
     23 for.body:                                         ; preds = %for.inc, %entry
     24   %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.body ]
     25   %r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.body ]
     26   %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
     27   %x1 = load i32, i32* %arrayidx1, align 4
     28   %cmp = icmp eq i32 %x1, 0
     29   %cast = zext i1 %cmp to i32
     30   %iv.1 = add nuw nsw i64 %iv.0, 1
     31   %r.1 = add i32 %r.0, %cast
     32   %exitcond = icmp eq i64 %iv.1, 10
     33   br i1 %exitcond, label %for.end, label %for.body
     34 
     35 for.end:                                          ; preds = %for.inc
     36   ret i32 %r.1
     37 }
     38 
     39 ; If we can figure out result of comparison on each iteration, we can resolve
     40 ; the depending branch. That means, that the unrolled version of the loop would
     41 ; have less code, because we don't need not-taken basic blocks there.
     42 ; This test checks that this is taken into consideration.
     43 ; We expect this loop to be unrolled, because the most complicated part of its
     44 ; body (if.then block) is never actually executed.
     45 ; CHECK-LABEL: @branch_folded
     46 ; CHECK-NOT: br i1 %
     47 ; CHECK: ret i32
     48 define i32 @branch_folded(i32* noalias nocapture readonly %b) {
     49 entry:
     50   br label %for.body
     51 
     52 for.body:                                         ; preds = %for.inc, %entry
     53   %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
     54   %r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.inc ]
     55   %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
     56   %x1 = load i32, i32* %arrayidx1, align 4
     57   %cmp = icmp eq i32 %x1, 0
     58   %iv.1 = add nuw nsw i64 %iv.0, 1
     59   br i1 %cmp, label %if.then, label %for.inc
     60 
     61 if.then:                                          ; preds = %for.body
     62   %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv.0
     63   %x2 = load i32, i32* %arrayidx2, align 4
     64   %add = add nsw i32 %x2, %r.0
     65   br label %for.inc
     66 
     67 for.inc:                                          ; preds = %for.body, %if.then
     68   %r.1 = phi i32 [ %add, %if.then ], [ %x1, %for.body ]
     69   %exitcond = icmp eq i64 %iv.1, 10
     70   br i1 %exitcond, label %for.end, label %for.body
     71 
     72 for.end:                                          ; preds = %for.inc
     73   ret i32 %r.1
     74 }
     75 
     76 ; This test is similar to the previous one, but in this we use IV in comparison
     77 ; (not a loaded value as we did there).
     78 ; CHECK-LABEL: @branch_iv
     79 ; CHECK-NOT: br i1 %
     80 ; CHECK: ret i64
     81 define i64 @branch_iv(i64* noalias nocapture readonly %b) {
     82 entry:
     83   br label %for.body
     84 
     85 for.body:                                         ; preds = %for.inc, %entry
     86   %indvars.iv = phi i64 [ 0, %entry ], [ %tmp3, %for.inc ]
     87   %r.030 = phi i64 [ 0, %entry ], [ %r.1, %for.inc ]
     88   %cmp3 = icmp eq i64 %indvars.iv, 5
     89   %tmp3 = add nuw nsw i64 %indvars.iv, 1
     90   br i1 %cmp3, label %if.then, label %for.inc
     91 
     92 if.then:                                          ; preds = %for.body
     93   %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 %tmp3
     94   %tmp1 = load i64, i64* %arrayidx2, align 4
     95   %add = add nsw i64 %tmp1, %r.030
     96   br label %for.inc
     97 
     98 for.inc:                                          ; preds = %if.then, %for.body
     99   %r.1 = phi i64 [ %add, %if.then ], [ %r.030, %for.body ]
    100   %exitcond = icmp eq i64 %tmp3, 20
    101   br i1 %exitcond, label %for.end, label %for.body
    102 
    103 for.end:                                          ; preds = %for.inc
    104   ret i64 %r.1
    105 }
    106 
    107 ; Induction variables are often casted to another type, and that shouldn't
    108 ; prevent us from folding branches. Tthis test specifically checks if we can
    109 ; handle this. Other than thatm it's similar to the previous test.
    110 ; CHECK-LABEL: @branch_iv_trunc
    111 ; CHECK-NOT:   br i1 %
    112 ; CHECK:   ret i32
    113 define i32 @branch_iv_trunc(i32* noalias nocapture readonly %b) {
    114 entry:
    115   br label %for.body
    116 
    117 for.body:                                         ; preds = %for.inc, %entry
    118   %indvars.iv = phi i64 [ 0, %entry ], [ %tmp3, %for.inc ]
    119   %r.030 = phi i32 [ 0, %entry ], [ %r.1, %for.inc ]
    120   %tmp2 = trunc i64 %indvars.iv to i32
    121   %cmp3 = icmp eq i32 %tmp2, 5
    122   %tmp3 = add nuw nsw i64 %indvars.iv, 1
    123   br i1 %cmp3, label %if.then, label %for.inc
    124 
    125 if.then:                                          ; preds = %for.body
    126   %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %tmp3
    127   %tmp1 = load i32, i32* %arrayidx2, align 4
    128   %add = add nsw i32 %tmp1, %r.030
    129   br label %for.inc
    130 
    131 for.inc:                                          ; preds = %if.then, %for.body
    132   %r.1 = phi i32 [ %add, %if.then ], [ %r.030, %for.body ]
    133   %exitcond = icmp eq i64 %tmp3, 10
    134   br i1 %exitcond, label %for.end, label %for.body
    135 
    136 for.end:                                          ; preds = %for.inc
    137   ret i32 %r.1
    138 }
    139 
    140 ; Check that we don't crash when we analyze icmp with pointer-typed IV and a
    141 ; pointer.
    142 ; CHECK-LABEL: @ptr_cmp_crash
    143 ; CHECK:   ret void
    144 define void @ptr_cmp_crash() {
    145 entry:
    146   br label %while.body
    147 
    148 while.body:
    149   %iv.0 = phi i32* [ getelementptr inbounds ([10 x i32], [10 x i32]* @known_constant, i64 0, i64 0), %entry ], [ %iv.1, %while.body ]
    150   %iv.1 = getelementptr inbounds i32, i32* %iv.0, i64 1
    151   %exitcond = icmp eq i32* %iv.1, getelementptr inbounds ([10 x i32], [10 x i32]* @known_constant, i64 0, i64 9)
    152   br i1 %exitcond, label %loop.exit, label %while.body
    153 
    154 loop.exit:
    155   ret void
    156 }
    157 
    158 ; Check that we don't crash when we analyze ptrtoint cast.
    159 ; CHECK-LABEL: @ptrtoint_cast_crash
    160 ; CHECK:   ret void
    161 define void @ptrtoint_cast_crash(i8 * %a) {
    162 entry:
    163   %limit = getelementptr i8, i8* %a, i64 512
    164   br label %loop.body
    165 
    166 loop.body:
    167   %iv.0 = phi i8* [ %a, %entry ], [ %iv.1, %loop.body ]
    168   %cast = ptrtoint i8* %iv.0 to i64
    169   %iv.1 = getelementptr inbounds i8, i8* %iv.0, i64 1
    170   %exitcond = icmp ne i8* %iv.1, %limit
    171   br i1 %exitcond, label %loop.body, label %loop.exit
    172 
    173 loop.exit:
    174   ret void
    175 }
    176 
    177 ; Loop unroller should be able to predict that a comparison would become
    178 ; constant if the operands are pointers with the same base and constant
    179 ; offsets.
    180 ; We expect this loop to be unrolled, since most of its instructions would
    181 ; become constant after it.
    182 ; CHECK-LABEL: @ptr_cmp
    183 ; CHECK-NOT:   br i1 %
    184 ; CHECK:   ret i64
    185 define i64 @ptr_cmp(i8 * %a) {
    186 entry:
    187   %limit = getelementptr i8, i8* %a, i64 40
    188   %start.iv2 = getelementptr i8, i8* %a, i64 7
    189   br label %loop.body
    190 
    191 loop.body:
    192   %iv.0 = phi i8* [ %a, %entry ], [ %iv.1, %loop.body ]
    193   %iv2.0 = phi i8* [ %start.iv2, %entry ], [ %iv2.1, %loop.body ]
    194   %r.0 = phi i64 [ 0, %entry ], [ %r.1, %loop.body ]
    195   %cast = ptrtoint i8* %iv.0 to i64
    196   %cmp = icmp eq i8* %iv2.0, %iv.0
    197   %sub = sext i1 %cmp to i64
    198   %mul = mul i64 %sub, %cast
    199   %r.1 = add i64 %r.0, %mul
    200   %iv.1 = getelementptr inbounds i8, i8* %iv.0, i64 1
    201   %iv2.1 = getelementptr inbounds i8, i8* %iv2.0, i64 1
    202   %exitcond = icmp ne i8* %iv.1, %limit
    203   br i1 %exitcond, label %loop.body, label %loop.exit
    204 
    205 loop.exit:
    206   ret i64 %r.1
    207 }
    208