Home | History | Annotate | Download | only in LoopUnrollAndJam
      1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
      2 ; RUN: opt -basicaa -tbaa -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s
      3 
      4 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
      5 
      6 ; CHECK-LABEL: test1
      7 ; Tests for(i) { sum = 0; for(j) sum += B[j]; A[i] = sum; }
      8 ; CHECK-NEXT:  entry:
      9 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[J:%.*]], 0
     10 ; CHECK-NEXT:    [[CMPJ:%.*]] = icmp ne i32 [[I:%.*]], 0
     11 ; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[CMP]], [[CMPJ]]
     12 ; CHECK-NEXT:    br i1 [[OR_COND]], label [[FOR_OUTER_PREHEADER:%.*]], label [[FOR_END:%.*]]
     13 ; CHECK:       for.outer.preheader:
     14 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[I]], -1
     15 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[I]], 3
     16 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
     17 ; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_OUTER_PREHEADER_NEW:%.*]]
     18 ; CHECK:       for.outer.preheader.new:
     19 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]]
     20 ; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
     21 ; CHECK:       for.outer:
     22 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD8_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_OUTER_PREHEADER_NEW]] ]
     23 ; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_LATCH]] ]
     24 ; CHECK-NEXT:    [[ADD8:%.*]] = add nuw nsw i32 [[I]], 1
     25 ; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1
     26 ; CHECK-NEXT:    [[ADD8_1:%.*]] = add nuw nsw i32 [[ADD8]], 1
     27 ; CHECK-NEXT:    [[NITER_NSUB_1:%.*]] = sub i32 [[NITER_NSUB]], 1
     28 ; CHECK-NEXT:    [[ADD8_2:%.*]] = add nuw nsw i32 [[ADD8_1]], 1
     29 ; CHECK-NEXT:    [[NITER_NSUB_2:%.*]] = sub i32 [[NITER_NSUB_1]], 1
     30 ; CHECK-NEXT:    [[ADD8_3]] = add nuw i32 [[ADD8_2]], 1
     31 ; CHECK-NEXT:    [[NITER_NSUB_3]] = sub i32 [[NITER_NSUB_2]], 1
     32 ; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
     33 ; CHECK:       for.inner:
     34 ; CHECK-NEXT:    [[J_0:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ]
     35 ; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD:%.*]], [[FOR_INNER]] ]
     36 ; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_1:%.*]], [[FOR_INNER]] ]
     37 ; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_1:%.*]], [[FOR_INNER]] ]
     38 ; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_2:%.*]], [[FOR_INNER]] ]
     39 ; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_2:%.*]], [[FOR_INNER]] ]
     40 ; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_3:%.*]], [[FOR_INNER]] ]
     41 ; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_3:%.*]], [[FOR_INNER]] ]
     42 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[J_0]]
     43 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !tbaa !0
     44 ; CHECK-NEXT:    [[ADD]] = add i32 [[TMP2]], [[SUM]]
     45 ; CHECK-NEXT:    [[INC]] = add nuw i32 [[J_0]], 1
     46 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_1]]
     47 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4, !tbaa !0
     48 ; CHECK-NEXT:    [[ADD_1]] = add i32 [[TMP3]], [[SUM_1]]
     49 ; CHECK-NEXT:    [[INC_1]] = add nuw i32 [[J_1]], 1
     50 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_2]]
     51 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4, !tbaa !0
     52 ; CHECK-NEXT:    [[ADD_2]] = add i32 [[TMP4]], [[SUM_2]]
     53 ; CHECK-NEXT:    [[INC_2]] = add nuw i32 [[J_2]], 1
     54 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_3]]
     55 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4, !tbaa !0
     56 ; CHECK-NEXT:    [[ADD_3]] = add i32 [[TMP5]], [[SUM_3]]
     57 ; CHECK-NEXT:    [[INC_3]] = add nuw i32 [[J_3]], 1
     58 ; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[J]]
     59 ; CHECK-NEXT:    br i1 [[EXITCOND_3]], label [[FOR_LATCH]], label [[FOR_INNER]]
     60 ; CHECK:       for.latch:
     61 ; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_INNER]] ]
     62 ; CHECK-NEXT:    [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], [[FOR_INNER]] ]
     63 ; CHECK-NEXT:    [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], [[FOR_INNER]] ]
     64 ; CHECK-NEXT:    [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], [[FOR_INNER]] ]
     65 ; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I]]
     66 ; CHECK-NEXT:    store i32 [[ADD_LCSSA]], i32* [[ARRAYIDX6]], align 4, !tbaa !0
     67 ; CHECK-NEXT:    [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8]]
     68 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_1]], i32* [[ARRAYIDX6_1]], align 4, !tbaa !0
     69 ; CHECK-NEXT:    [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_1]]
     70 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_2]], i32* [[ARRAYIDX6_2]], align 4, !tbaa !0
     71 ; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_2]]
     72 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], i32* [[ARRAYIDX6_3]], align 4, !tbaa !0
     73 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NSUB_3]], 0
     74 ; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop !4
     75 ; CHECK:       for.end.loopexit.unr-lcssa.loopexit:
     76 ; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD8_3]], [[FOR_LATCH]] ]
     77 ; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
     78 ; CHECK:       for.end.loopexit.unr-lcssa:
     79 ; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
     80 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
     81 ; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_OUTER_EPIL_PREHEADER:%.*]], label [[FOR_END_LOOPEXIT:%.*]]
     82 ; CHECK:       for.outer.epil.preheader:
     83 ; CHECK-NEXT:    br label [[FOR_OUTER_EPIL:%.*]]
     84 ; CHECK:       for.outer.epil:
     85 ; CHECK-NEXT:    br label [[FOR_INNER_EPIL:%.*]]
     86 ; CHECK:       for.inner.epil:
     87 ; CHECK-NEXT:    [[J_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], [[FOR_INNER_EPIL]] ]
     88 ; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], [[FOR_INNER_EPIL]] ]
     89 ; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_EPIL]]
     90 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4, !tbaa !0
     91 ; CHECK-NEXT:    [[ADD_EPIL]] = add i32 [[TMP6]], [[SUM_EPIL]]
     92 ; CHECK-NEXT:    [[INC_EPIL]] = add nuw i32 [[J_EPIL]], 1
     93 ; CHECK-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp eq i32 [[INC_EPIL]], [[J]]
     94 ; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label [[FOR_LATCH_EPIL:%.*]], label [[FOR_INNER_EPIL]]
     95 ; CHECK:       for.latch.epil:
     96 ; CHECK-NEXT:    [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_INNER_EPIL]] ]
     97 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_UNR]]
     98 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL]], i32* [[ARRAYIDX6_EPIL]], align 4, !tbaa !0
     99 ; CHECK-NEXT:    [[ADD8_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1
    100 ; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
    101 ; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
    102 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]]
    103 ; CHECK:       for.end.loopexit.epilog-lcssa:
    104 ; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT]]
    105 ; CHECK:       for.end.loopexit:
    106 ; CHECK-NEXT:    br label [[FOR_END]]
    107 ; CHECK:       for.end:
    108 ; CHECK-NEXT:    ret void
    109 ; CHECK:       for.outer.epil.1:
    110 ; CHECK-NEXT:    br label [[FOR_INNER_EPIL_1:%.*]]
    111 ; CHECK:       for.inner.epil.1:
    112 ; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ]
    113 ; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ]
    114 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_EPIL_1]]
    115 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4, !tbaa !0
    116 ; CHECK-NEXT:    [[ADD_EPIL_1]] = add i32 [[TMP7]], [[SUM_EPIL_1]]
    117 ; CHECK-NEXT:    [[INC_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1
    118 ; CHECK-NEXT:    [[EXITCOND_EPIL_1:%.*]] = icmp eq i32 [[INC_EPIL_1]], [[J]]
    119 ; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label [[FOR_LATCH_EPIL_1:%.*]], label [[FOR_INNER_EPIL_1]]
    120 ; CHECK:       for.latch.epil.1:
    121 ; CHECK-NEXT:    [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], [[FOR_INNER_EPIL_1]] ]
    122 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_EPIL]]
    123 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_1]], i32* [[ARRAYIDX6_EPIL_1]], align 4, !tbaa !0
    124 ; CHECK-NEXT:    [[ADD8_EPIL_1:%.*]] = add nuw i32 [[ADD8_EPIL]], 1
    125 ; CHECK-NEXT:    [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1
    126 ; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0
    127 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
    128 ; CHECK:       for.outer.epil.2:
    129 ; CHECK-NEXT:    br label [[FOR_INNER_EPIL_2:%.*]]
    130 ; CHECK:       for.inner.epil.2:
    131 ; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ]
    132 ; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ]
    133 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_EPIL_2]]
    134 ; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4, !tbaa !0
    135 ; CHECK-NEXT:    [[ADD_EPIL_2]] = add i32 [[TMP8]], [[SUM_EPIL_2]]
    136 ; CHECK-NEXT:    [[INC_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1
    137 ; CHECK-NEXT:    [[EXITCOND_EPIL_2:%.*]] = icmp eq i32 [[INC_EPIL_2]], [[J]]
    138 ; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_2]], label [[FOR_LATCH_EPIL_2:%.*]], label [[FOR_INNER_EPIL_2]]
    139 ; CHECK:       for.latch.epil.2:
    140 ; CHECK-NEXT:    [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], [[FOR_INNER_EPIL_2]] ]
    141 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_EPIL_1]]
    142 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_2]], i32* [[ARRAYIDX6_EPIL_2]], align 4, !tbaa !0
    143 ; CHECK-NEXT:    [[ADD8_EPIL_2:%.*]] = add nuw i32 [[ADD8_EPIL_1]], 1
    144 ; CHECK-NEXT:    [[EPIL_ITER_SUB_2:%.*]] = sub i32 [[EPIL_ITER_SUB_1]], 1
    145 ; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
    146 define void @test1(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
    147 entry:
    148   %cmp = icmp ne i32 %J, 0
    149   %cmpJ = icmp ne i32 %I, 0
    150   %or.cond = and i1 %cmp, %cmpJ
    151   br i1 %or.cond, label %for.outer.preheader, label %for.end
    152 
    153 for.outer.preheader:
    154   br label %for.outer
    155 
    156 for.outer:
    157   %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
    158   br label %for.inner
    159 
    160 for.inner:
    161   %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
    162   %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
    163   %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
    164   %0 = load i32, i32* %arrayidx, align 4, !tbaa !5
    165   %add = add i32 %0, %sum
    166   %inc = add nuw i32 %j, 1
    167   %exitcond = icmp eq i32 %inc, %J
    168   br i1 %exitcond, label %for.latch, label %for.inner
    169 
    170 for.latch:
    171   %add.lcssa = phi i32 [ %add, %for.inner ]
    172   %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
    173   store i32 %add.lcssa, i32* %arrayidx6, align 4, !tbaa !5
    174   %add8 = add nuw i32 %i, 1
    175   %exitcond25 = icmp eq i32 %add8, %I
    176   br i1 %exitcond25, label %for.end.loopexit, label %for.outer
    177 
    178 for.end.loopexit:
    179   br label %for.end
    180 
    181 for.end:
    182   ret void
    183 }
    184 
    185 
    186 ; CHECK-LABEL: test2
    187 ; Tests for(i) { sum = A[i]; for(j) sum += B[j]; A[i] = sum; }
    188 ; A[i] load/store dependency should not block unroll-and-jam
    189 ; CHECK: for.outer:
    190 ; CHECK:   %i = phi i32 [ %add9.3, %for.latch ], [ 0, %for.outer.preheader.new ]
    191 ; CHECK:   %niter = phi i32 [ %unroll_iter, %for.outer.preheader.new ], [ %niter.nsub.3, %for.latch ]
    192 ; CHECK:   br label %for.inner
    193 ; CHECK: for.inner:
    194 ; CHECK:   %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
    195 ; CHECK:   %sum = phi i32 [ %2, %for.outer ], [ %add, %for.inner ]
    196 ; CHECK:   %j.1 = phi i32 [ 0, %for.outer ], [ %inc.1, %for.inner ]
    197 ; CHECK:   %sum.1 = phi i32 [ %3, %for.outer ], [ %add.1, %for.inner ]
    198 ; CHECK:   %j.2 = phi i32 [ 0, %for.outer ], [ %inc.2, %for.inner ]
    199 ; CHECK:   %sum.2 = phi i32 [ %4, %for.outer ], [ %add.2, %for.inner ]
    200 ; CHECK:   %j.3 = phi i32 [ 0, %for.outer ], [ %inc.3, %for.inner ]
    201 ; CHECK:   %sum.3 = phi i32 [ %5, %for.outer ], [ %add.3, %for.inner ]
    202 ; CHECK:   br i1 %exitcond.3, label %for.latch, label %for.inner
    203 ; CHECK: for.latch:
    204 ; CHECK:   %add.lcssa = phi i32 [ %add, %for.inner ]
    205 ; CHECK:   %add.lcssa.1 = phi i32 [ %add.1, %for.inner ]
    206 ; CHECK:   %add.lcssa.2 = phi i32 [ %add.2, %for.inner ]
    207 ; CHECK:   %add.lcssa.3 = phi i32 [ %add.3, %for.inner ]
    208 ; CHECK:   br i1 %niter.ncmp.3, label %for.end10.loopexit.unr-lcssa.loopexit, label %for.outer
    209 ; CHECK: for.end10.loopexit.unr-lcssa.loopexit:
    210 define void @test2(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
    211 entry:
    212   %cmp = icmp ne i32 %J, 0
    213   %cmp125 = icmp ne i32 %I, 0
    214   %or.cond = and i1 %cmp, %cmp125
    215   br i1 %or.cond, label %for.outer.preheader, label %for.end10
    216 
    217 for.outer.preheader:
    218   br label %for.outer
    219 
    220 for.outer:
    221   %i = phi i32 [ %add9, %for.latch ], [ 0, %for.outer.preheader ]
    222   %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
    223   %0 = load i32, i32* %arrayidx, align 4, !tbaa !5
    224   br label %for.inner
    225 
    226 for.inner:
    227   %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
    228   %sum = phi i32 [ %0, %for.outer ], [ %add, %for.inner ]
    229   %arrayidx6 = getelementptr inbounds i32, i32* %B, i32 %j
    230   %1 = load i32, i32* %arrayidx6, align 4, !tbaa !5
    231   %add = add i32 %1, %sum
    232   %inc = add nuw i32 %j, 1
    233   %exitcond = icmp eq i32 %inc, %J
    234   br i1 %exitcond, label %for.latch, label %for.inner
    235 
    236 for.latch:
    237   %add.lcssa = phi i32 [ %add, %for.inner ]
    238   store i32 %add.lcssa, i32* %arrayidx, align 4, !tbaa !5
    239   %add9 = add nuw i32 %i, 1
    240   %exitcond28 = icmp eq i32 %add9, %I
    241   br i1 %exitcond28, label %for.end10.loopexit, label %for.outer
    242 
    243 for.end10.loopexit:
    244   br label %for.end10
    245 
    246 for.end10:
    247   ret void
    248 }
    249 
    250 
    251 ; CHECK-LABEL: test3
    252 ; Tests Complete unroll-and-jam of the outer loop
    253 ; CHECK: for.outer:
    254 ; CHECK:   br label %for.inner
    255 ; CHECK: for.inner:
    256 ; CHECK:   %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
    257 ; CHECK:   %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
    258 ; CHECK:   %j.1 = phi i32 [ 0, %for.outer ], [ %inc.1, %for.inner ]
    259 ; CHECK:   %sum.1 = phi i32 [ 0, %for.outer ], [ %add.1, %for.inner ]
    260 ; CHECK:   %j.2 = phi i32 [ 0, %for.outer ], [ %inc.2, %for.inner ]
    261 ; CHECK:   %sum.2 = phi i32 [ 0, %for.outer ], [ %add.2, %for.inner ]
    262 ; CHECK:   %j.3 = phi i32 [ 0, %for.outer ], [ %inc.3, %for.inner ]
    263 ; CHECK:   %sum.3 = phi i32 [ 0, %for.outer ], [ %add.3, %for.inner ]
    264 ; CHECK:   br i1 %exitcond.3, label %for.latch, label %for.inner
    265 ; CHECK: for.latch:
    266 ; CHECK:   %add.lcssa = phi i32 [ %add, %for.inner ]
    267 ; CHECK:   %add.lcssa.1 = phi i32 [ %add.1, %for.inner ]
    268 ; CHECK:   %add.lcssa.2 = phi i32 [ %add.2, %for.inner ]
    269 ; CHECK:   %add.lcssa.3 = phi i32 [ %add.3, %for.inner ]
    270 ; CHECK:   br label %for.end
    271 ; CHECK: for.end:
    272 define void @test3(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
    273 entry:
    274   %cmp = icmp eq i32 %J, 0
    275   br i1 %cmp, label %for.end, label %for.preheader
    276 
    277 for.preheader:
    278   br label %for.outer
    279 
    280 for.outer:
    281   %i = phi i32 [ %add8, %for.latch ], [ 0, %for.preheader ]
    282   br label %for.inner
    283 
    284 for.inner:
    285   %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
    286   %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
    287   %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
    288   %0 = load i32, i32* %arrayidx, align 4, !tbaa !5
    289   %sub = add i32 %sum, 10
    290   %add = sub i32 %sub, %0
    291   %inc = add nuw i32 %j, 1
    292   %exitcond = icmp eq i32 %inc, %J
    293   br i1 %exitcond, label %for.latch, label %for.inner
    294 
    295 for.latch:
    296   %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
    297   store i32 %add, i32* %arrayidx6, align 4, !tbaa !5
    298   %add8 = add nuw nsw i32 %i, 1
    299   %exitcond23 = icmp eq i32 %add8, 4
    300   br i1 %exitcond23, label %for.end, label %for.outer
    301 
    302 for.end:
    303   ret void
    304 }
    305 
    306 
    307 ; CHECK-LABEL: test4
    308 ; Tests Complete unroll-and-jam with a trip count of 1
    309 ; CHECK: for.outer:
    310 ; CHECK:   br label %for.inner
    311 ; CHECK: for.inner:
    312 ; CHECK:   %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
    313 ; CHECK:   %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
    314 ; CHECK:   br i1 %exitcond, label %for.latch, label %for.inner
    315 ; CHECK: for.latch:
    316 ; CHECK:   %add.lcssa = phi i32 [ %add, %for.inner ]
    317 ; CHECK:   br label %for.end
    318 ; CHECK: for.end:
    319 define void @test4(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
    320 entry:
    321   %cmp = icmp eq i32 %J, 0
    322   br i1 %cmp, label %for.end, label %for.preheader
    323 
    324 for.preheader:
    325   br label %for.outer
    326 
    327 for.outer:
    328   %i = phi i32 [ %add8, %for.latch ], [ 0, %for.preheader ]
    329   br label %for.inner
    330 
    331 for.inner:
    332   %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
    333   %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
    334   %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
    335   %0 = load i32, i32* %arrayidx, align 4, !tbaa !5
    336   %sub = add i32 %sum, 10
    337   %add = sub i32 %sub, %0
    338   %inc = add nuw i32 %j, 1
    339   %exitcond = icmp eq i32 %inc, %J
    340   br i1 %exitcond, label %for.latch, label %for.inner
    341 
    342 for.latch:
    343   %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
    344   store i32 %add, i32* %arrayidx6, align 4, !tbaa !5
    345   %add8 = add nuw nsw i32 %i, 1
    346   %exitcond23 = icmp eq i32 %add8, 1
    347   br i1 %exitcond23, label %for.end, label %for.outer
    348 
    349 for.end:
    350   ret void
    351 }
    352 
    353 
    354 ; CHECK-LABEL: test5
    355 ; Multiple SubLoopBlocks
    356 ; CHECK: for.outer:
    357 ; CHECK:   br label %for.inner
    358 ; CHECK: for.inner:
    359 ; CHECK:   %inc8.sink15 = phi i32 [ 0, %for.outer ], [ %inc8, %for.inc.1 ]
    360 ; CHECK:   %inc8.sink15.1 = phi i32 [ 0, %for.outer ], [ %inc8.1, %for.inc.1 ]
    361 ; CHECK:   br label %for.inner2
    362 ; CHECK: for.inner2:
    363 ; CHECK:   br i1 %tobool, label %for.cond4, label %for.inc
    364 ; CHECK: for.cond4:
    365 ; CHECK:   br i1 %tobool.1, label %for.cond4a, label %for.inc
    366 ; CHECK: for.cond4a:
    367 ; CHECK:   br label %for.inc
    368 ; CHECK: for.inc:
    369 ; CHECK:   br i1 %tobool.11, label %for.cond4.1, label %for.inc.1
    370 ; CHECK: for.latch:
    371 ; CHECK:   br label %for.end
    372 ; CHECK: for.end:
    373 ; CHECK:   ret i32 0
    374 ; CHECK: for.cond4.1:
    375 ; CHECK:   br i1 %tobool.1.1, label %for.cond4a.1, label %for.inc.1
    376 ; CHECK: for.cond4a.1:
    377 ; CHECK:   br label %for.inc.1
    378 ; CHECK: for.inc.1:
    379 ; CHECK:   br i1 %exitcond.1, label %for.latch, label %for.inner
    380 @a = hidden global [1 x i32] zeroinitializer, align 4
    381 define i32 @test5() #0 {
    382 entry:
    383   br label %for.outer
    384 
    385 for.outer:
    386   %.sink16 = phi i32 [ 0, %entry ], [ %add, %for.latch ]
    387   br label %for.inner
    388 
    389 for.inner:
    390   %inc8.sink15 = phi i32 [ 0, %for.outer ], [ %inc8, %for.inc ]
    391   br label %for.inner2
    392 
    393 for.inner2:
    394   %l1 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @a, i32 0, i32 0), align 4
    395   %tobool = icmp eq i32 %l1, 0
    396   br i1 %tobool, label %for.cond4, label %for.inc
    397 
    398 for.cond4:
    399   %l0 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @a, i32 1, i32 0), align 4
    400   %tobool.1 = icmp eq i32 %l0, 0
    401   br i1 %tobool.1, label %for.cond4a, label %for.inc
    402 
    403 for.cond4a:
    404   br label %for.inc
    405 
    406 for.inc:
    407   %l2 = phi i32 [ 0, %for.inner2 ], [ 1, %for.cond4 ], [ 2, %for.cond4a ]
    408   %inc8 = add nuw nsw i32 %inc8.sink15, 1
    409   %exitcond = icmp eq i32 %inc8, 3
    410   br i1 %exitcond, label %for.latch, label %for.inner
    411 
    412 for.latch:
    413   %.lcssa = phi i32 [ %l2, %for.inc ]
    414   %conv11 = and i32 %.sink16, 255
    415   %add = add nuw nsw i32 %conv11, 4
    416   %cmp = icmp eq i32 %add, 8
    417   br i1 %cmp, label %for.end, label %for.outer
    418 
    419 for.end:
    420   %.lcssa.lcssa = phi i32 [ %.lcssa, %for.latch ]
    421   ret i32 0
    422 }
    423 
    424 
    425 ; CHECK-LABEL: test6
    426 ; Test odd uses of phi nodes
    427 ; CHECK: for.outer:
    428 ; CHECK:   br label %for.inner
    429 ; CHECK: for.inner:
    430 ; CHECK:   br i1 %exitcond.3, label %for.inner, label %for.latch
    431 ; CHECK: for.latch:
    432 ; CHECK:   br label %for.end
    433 ; CHECK: for.end:
    434 ; CHECK:   ret i32 0
    435 @f = hidden global i32 0, align 4
    436 define i32 @test6() #0 {
    437 entry:
    438   %f.promoted10 = load i32, i32* @f, align 4, !tbaa !5
    439   br label %for.outer
    440 
    441 for.outer:
    442   %p0 = phi i32 [ %f.promoted10, %entry ], [ 2, %for.latch ]
    443   %inc5.sink9 = phi i32 [ 2, %entry ], [ %inc5, %for.latch ]
    444   br label %for.inner
    445 
    446 for.inner:
    447   %p1 = phi i32 [ %p0, %for.outer ], [ 2, %for.inner ]
    448   %inc.sink8 = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
    449   %inc = add nuw nsw i32 %inc.sink8, 1
    450   %exitcond = icmp ne i32 %inc, 7
    451   br i1 %exitcond, label %for.inner, label %for.latch
    452 
    453 for.latch:
    454   %.lcssa = phi i32 [ %p1, %for.inner ]
    455   %inc5 = add nuw nsw i32 %inc5.sink9, 1
    456   %exitcond11 = icmp ne i32 %inc5, 7
    457   br i1 %exitcond11, label %for.outer, label %for.end
    458 
    459 for.end:
    460   %.lcssa.lcssa = phi i32 [ %.lcssa, %for.latch ]
    461   %inc.lcssa.lcssa = phi i32 [ 7, %for.latch ]
    462   ret i32 0
    463 }
    464 
    465 
    466 ; CHECK-LABEL: test7
    467 ; Has a positive dependency between two stores. Still valid.
    468 ; The negative dependecy is in unroll-and-jam-disabled.ll
    469 ; CHECK: for.outer:
    470 ; CHECK:   %i = phi i32 [ %add.3, %for.latch ], [ 0, %for.preheader.new ]
    471 ; CHECK:   %niter = phi i32 [ %unroll_iter, %for.preheader.new ], [ %niter.nsub.3, %for.latch ]
    472 ; CHECK:   br label %for.inner
    473 ; CHECK: for.latch:
    474 ; CHECK:   %add9.lcssa = phi i32 [ %add9, %for.inner ]
    475 ; CHECK:   %add9.lcssa.1 = phi i32 [ %add9.1, %for.inner ]
    476 ; CHECK:   %add9.lcssa.2 = phi i32 [ %add9.2, %for.inner ]
    477 ; CHECK:   %add9.lcssa.3 = phi i32 [ %add9.3, %for.inner ]
    478 ; CHECK:   br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa.loopexit, label %for.outer
    479 ; CHECK: for.inner:
    480 ; CHECK:   %sum = phi i32 [ 0, %for.outer ], [ %add9, %for.inner ]
    481 ; CHECK:   %j = phi i32 [ 0, %for.outer ], [ %add10, %for.inner ]
    482 ; CHECK:   %sum.1 = phi i32 [ 0, %for.outer ], [ %add9.1, %for.inner ]
    483 ; CHECK:   %j.1 = phi i32 [ 0, %for.outer ], [ %add10.1, %for.inner ]
    484 ; CHECK:   %sum.2 = phi i32 [ 0, %for.outer ], [ %add9.2, %for.inner ]
    485 ; CHECK:   %j.2 = phi i32 [ 0, %for.outer ], [ %add10.2, %for.inner ]
    486 ; CHECK:   %sum.3 = phi i32 [ 0, %for.outer ], [ %add9.3, %for.inner ]
    487 ; CHECK:   %j.3 = phi i32 [ 0, %for.outer ], [ %add10.3, %for.inner ]
    488 ; CHECK:   br i1 %exitcond.3, label %for.latch, label %for.inner
    489 ; CHECK: for.end.loopexit.unr-lcssa.loopexit:
    490 define void @test7(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
    491 entry:
    492   %cmp = icmp ne i32 %J, 0
    493   %cmp128 = icmp ne i32 %I, 0
    494   %or.cond = and i1 %cmp128, %cmp
    495   br i1 %or.cond, label %for.preheader, label %for.end
    496 
    497 for.preheader:
    498   br label %for.outer
    499 
    500 for.outer:
    501   %i = phi i32 [ %add, %for.latch ], [ 0, %for.preheader ]
    502   %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
    503   store i32 0, i32* %arrayidx, align 4, !tbaa !5
    504   %add = add nuw i32 %i, 1
    505   %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add
    506   store i32 2, i32* %arrayidx2, align 4, !tbaa !5
    507   br label %for.inner
    508 
    509 for.latch:
    510   store i32 %add9, i32* %arrayidx, align 4, !tbaa !5
    511   %exitcond30 = icmp eq i32 %add, %I
    512   br i1 %exitcond30, label %for.end, label %for.outer
    513 
    514 for.inner:
    515   %sum = phi i32 [ 0, %for.outer ], [ %add9, %for.inner ]
    516   %j = phi i32 [ 0, %for.outer ], [ %add10, %for.inner ]
    517   %arrayidx7 = getelementptr inbounds i32, i32* %B, i32 %j
    518   %l1 = load i32, i32* %arrayidx7, align 4, !tbaa !5
    519   %add9 = add i32 %l1, %sum
    520   %add10 = add nuw i32 %j, 1
    521   %exitcond = icmp eq i32 %add10, %J
    522   br i1 %exitcond, label %for.latch, label %for.inner
    523 
    524 for.end:
    525   ret void
    526 }
    527 
    528 
    529 ; CHECK-LABEL: test8
    530 ; Same as test7 with an extra outer loop nest
    531 ; CHECK: for.outest:
    532 ; CHECK:   br label %for.outer
    533 ; CHECK: for.outer:
    534 ; CHECK:   %i = phi i32 [ %add.3, %for.latch ], [ 0, %for.outest.new ]
    535 ; CHECK:   %niter = phi i32 [ %unroll_iter, %for.outest.new ], [ %niter.nsub.3, %for.latch ]
    536 ; CHECK:   br label %for.inner
    537 ; CHECK: for.inner:
    538 ; CHECK:   %sum = phi i32 [ 0, %for.outer ], [ %add9, %for.inner ]
    539 ; CHECK:   %j = phi i32 [ 0, %for.outer ], [ %add10, %for.inner ]
    540 ; CHECK:   %sum.1 = phi i32 [ 0, %for.outer ], [ %add9.1, %for.inner ]
    541 ; CHECK:   %j.1 = phi i32 [ 0, %for.outer ], [ %add10.1, %for.inner ]
    542 ; CHECK:   %sum.2 = phi i32 [ 0, %for.outer ], [ %add9.2, %for.inner ]
    543 ; CHECK:   %j.2 = phi i32 [ 0, %for.outer ], [ %add10.2, %for.inner ]
    544 ; CHECK:   %sum.3 = phi i32 [ 0, %for.outer ], [ %add9.3, %for.inner ]
    545 ; CHECK:   %j.3 = phi i32 [ 0, %for.outer ], [ %add10.3, %for.inner ]
    546 ; CHECK:   br i1 %exitcond.3, label %for.latch, label %for.inner
    547 ; CHECK: for.latch:
    548 ; CHECK:   %add9.lcssa = phi i32 [ %add9, %for.inner ]
    549 ; CHECK:   %add9.lcssa.1 = phi i32 [ %add9.1, %for.inner ]
    550 ; CHECK:   %add9.lcssa.2 = phi i32 [ %add9.2, %for.inner ]
    551 ; CHECK:   %add9.lcssa.3 = phi i32 [ %add9.3, %for.inner ]
    552 ; CHECK:   br i1 %niter.ncmp.3, label %for.cleanup.unr-lcssa.loopexit, label %for.outer
    553 ; CHECK: for.cleanup.epilog-lcssa:
    554 ; CHECK:   br label %for.cleanup
    555 ; CHECK: for.cleanup:
    556 ; CHECK:   br i1 %exitcond41, label %for.end.loopexit, label %for.outest
    557 ; CHECK: for.end.loopexit:
    558 ; CHECK:   br label %for.end
    559 define void @test8(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
    560 entry:
    561   %cmp = icmp eq i32 %J, 0
    562   %cmp336 = icmp eq i32 %I, 0
    563   %or.cond = or i1 %cmp, %cmp336
    564   br i1 %or.cond, label %for.end, label %for.preheader
    565 
    566 for.preheader:
    567   br label %for.outest
    568 
    569 for.outest:
    570   %x.038 = phi i32 [ %inc, %for.cleanup ], [ 0, %for.preheader ]
    571   br label %for.outer
    572 
    573 for.outer:
    574   %i = phi i32 [ %add, %for.latch ], [ 0, %for.outest ]
    575   %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
    576   store i32 0, i32* %arrayidx, align 4, !tbaa !5
    577   %add = add nuw i32 %i, 1
    578   %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %add
    579   store i32 2, i32* %arrayidx6, align 4, !tbaa !5
    580   br label %for.inner
    581 
    582 for.inner:
    583   %sum = phi i32 [ 0, %for.outer ], [ %add9, %for.inner ]
    584   %j = phi i32 [ 0, %for.outer ], [ %add10, %for.inner ]
    585   %arrayidx11 = getelementptr inbounds i32, i32* %B, i32 %j
    586   %l1 = load i32, i32* %arrayidx11, align 4, !tbaa !5
    587   %add9 = add i32 %l1, %sum
    588   %add10 = add nuw i32 %j, 1
    589   %exitcond = icmp eq i32 %add10, %J
    590   br i1 %exitcond, label %for.latch, label %for.inner
    591 
    592 for.latch:
    593   store i32 %add9, i32* %arrayidx, align 4, !tbaa !5
    594   %exitcond39 = icmp eq i32 %add, %I
    595   br i1 %exitcond39, label %for.cleanup, label %for.outer
    596 
    597 for.cleanup:
    598   %inc = add nuw nsw i32 %x.038, 1
    599   %exitcond41 = icmp eq i32 %inc, 5
    600   br i1 %exitcond41, label %for.end, label %for.outest
    601 
    602 for.end:
    603   ret void
    604 }
    605 
    606 
    607 ; CHECK-LABEL: test9
    608 ; Same as test1 with tbaa, not noalias
    609 ; CHECK: for.outer:
    610 ; CHECK:   %i = phi i32 [ %add8.3, %for.latch ], [ 0, %for.outer.preheader.new ]
    611 ; CHECK:   %niter = phi i32 [ %unroll_iter, %for.outer.preheader.new ], [ %niter.nsub.3, %for.latch ]
    612 ; CHECK:   br label %for.inner
    613 ; CHECK: for.inner:
    614 ; CHECK:   %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
    615 ; CHECK:   %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
    616 ; CHECK:   %j.1 = phi i32 [ 0, %for.outer ], [ %inc.1, %for.inner ]
    617 ; CHECK:   %sum.1 = phi i32 [ 0, %for.outer ], [ %add.1, %for.inner ]
    618 ; CHECK:   %j.2 = phi i32 [ 0, %for.outer ], [ %inc.2, %for.inner ]
    619 ; CHECK:   %sum.2 = phi i32 [ 0, %for.outer ], [ %add.2, %for.inner ]
    620 ; CHECK:   %j.3 = phi i32 [ 0, %for.outer ], [ %inc.3, %for.inner ]
    621 ; CHECK:   %sum.3 = phi i32 [ 0, %for.outer ], [ %add.3, %for.inner ]
    622 ; CHECK:   br i1 %exitcond.3, label %for.latch, label %for.inner
    623 ; CHECK: for.latch:
    624 ; CHECK:   %add.lcssa = phi i32 [ %add, %for.inner ]
    625 ; CHECK:   %add.lcssa.1 = phi i32 [ %add.1, %for.inner ]
    626 ; CHECK:   %add.lcssa.2 = phi i32 [ %add.2, %for.inner ]
    627 ; CHECK:   %add.lcssa.3 = phi i32 [ %add.3, %for.inner ]
    628 ; CHECK:   br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa.loopexit, label %for.outer
    629 ; CHECK: for.end.loopexit.unr-lcssa.loopexit:
    630 define void @test9(i32 %I, i32 %J, i32* nocapture %A, i16* nocapture readonly %B) #0 {
    631 entry:
    632   %cmp = icmp ne i32 %J, 0
    633   %cmpJ = icmp ne i32 %I, 0
    634   %or.cond = and i1 %cmp, %cmpJ
    635   br i1 %or.cond, label %for.outer.preheader, label %for.end
    636 
    637 for.outer.preheader:
    638   br label %for.outer
    639 
    640 for.outer:
    641   %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
    642   br label %for.inner
    643 
    644 for.inner:
    645   %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
    646   %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
    647   %arrayidx = getelementptr inbounds i16, i16* %B, i32 %j
    648   %0 = load i16, i16* %arrayidx, align 4, !tbaa !9
    649   %sext = sext i16 %0 to i32
    650   %add = add i32 %sext, %sum
    651   %inc = add nuw i32 %j, 1
    652   %exitcond = icmp eq i32 %inc, %J
    653   br i1 %exitcond, label %for.latch, label %for.inner
    654 
    655 for.latch:
    656   %add.lcssa = phi i32 [ %add, %for.inner ]
    657   %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
    658   store i32 %add.lcssa, i32* %arrayidx6, align 4, !tbaa !5
    659   %add8 = add nuw i32 %i, 1
    660   %exitcond25 = icmp eq i32 %add8, %I
    661   br i1 %exitcond25, label %for.end.loopexit, label %for.outer
    662 
    663 for.end.loopexit:
    664   br label %for.end
    665 
    666 for.end:
    667   ret void
    668 }
    669 
    670 
    671 ; CHECK-LABEL: test10
    672 ; Be careful not to incorrectly update the exit phi nodes
    673 ; CHECK: %dec.lcssa.lcssa.ph.ph = phi i64 [ 0, %for.inc24 ]
    674 %struct.a = type { i64 }
    675 @g = common global %struct.a zeroinitializer, align 8
    676 @c = common global [1 x i8] zeroinitializer, align 1
    677 define signext i16 @test10(i32 %k) #0 {
    678 entry:
    679   %0 = load i8, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @c, i64 0, i64 0), align 1
    680   %tobool9 = icmp eq i8 %0, 0
    681   %tobool13 = icmp ne i32 %k, 0
    682   br label %for.body
    683 
    684 for.body:
    685   %storemerge82 = phi i64 [ 0, %entry ], [ %inc25, %for.inc24 ]
    686   br label %for.body2
    687 
    688 for.body2:
    689   %storemerge = phi i64 [ 4, %for.body ], [ %dec, %for.inc21 ]
    690   br i1 %tobool9, label %for.body2.split, label %for.body2.split2
    691 
    692 for.body2.split2:
    693   br i1 %tobool13, label %for.inc21, label %for.inc21.if
    694 
    695 for.body2.split:
    696   br i1 %tobool13, label %for.inc21, label %for.inc21.then
    697 
    698 for.inc21.if:
    699   %storemerge.1 = phi i64 [ 0, %for.body2.split2 ]
    700   br label %for.inc21
    701 
    702 for.inc21.then:
    703   %storemerge.2 = phi i64 [ 0, %for.body2.split ]
    704   %storemerge.3 = phi i32 [ 0, %for.body2.split ]
    705   br label %for.inc21
    706 
    707 for.inc21:
    708   %storemerge.4 = phi i64 [ %storemerge.1, %for.inc21.if ], [ %storemerge.2, %for.inc21.then ], [ 4, %for.body2.split2 ], [ 4, %for.body2.split ]
    709   %storemerge.5 = phi i32 [ 0, %for.inc21.if ], [ %storemerge.3, %for.inc21.then ], [ 0, %for.body2.split2 ], [ 0, %for.body2.split ]
    710   %dec = add nsw i64 %storemerge, -1
    711   %tobool = icmp eq i64 %dec, 0
    712   br i1 %tobool, label %for.inc24, label %for.body2
    713 
    714 for.inc24:
    715   %storemerge.4.lcssa = phi i64 [ %storemerge.4, %for.inc21 ]
    716   %storemerge.5.lcssa = phi i32 [ %storemerge.5, %for.inc21 ]
    717   %inc25 = add nuw nsw i64 %storemerge82, 1
    718   %exitcond = icmp ne i64 %inc25, 5
    719   br i1 %exitcond, label %for.body, label %for.end26
    720 
    721 for.end26:
    722   %dec.lcssa.lcssa = phi i64 [ 0, %for.inc24 ]
    723   %storemerge.4.lcssa.lcssa = phi i64 [ %storemerge.4.lcssa, %for.inc24 ]
    724   %storemerge.5.lcssa.lcssa = phi i32 [ %storemerge.5.lcssa, %for.inc24 ]
    725   store i64 %dec.lcssa.lcssa, i64* getelementptr inbounds (%struct.a, %struct.a* @g, i64 0, i32 0), align 8
    726   ret i16 0
    727 }
    728 
    729 
    730 !5 = !{!6, !6, i64 0}
    731 !6 = !{!"int", !7, i64 0}
    732 !7 = !{!"omnipotent char", !8, i64 0}
    733 !8 = !{!"Simple C/C++ TBAA"}
    734 !9 = !{!10, !10, i64 0}
    735 !10 = !{!"short", !7, i64 0}
    736