Home | History | Annotate | Download | only in LoopUnrollAndJam
      1 ; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-runtime < %s -S | FileCheck %s
      2 ; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-runtime -unroll-and-jam-threshold=15 < %s -S | FileCheck %s --check-prefix=CHECK-LOWTHRES
      3 
      4 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
      5 
      6 ; CHECK-LABEL: test1
      7 ; Basic check that these loops are by default UnJ'd
      8 define void @test1(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
      9 ; CHECK: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ]
     10 ; CHECK-LOWTHRES: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
     11 entry:
     12   %cmp = icmp ne i32 %J, 0
     13   %cmp122 = icmp ne i32 %I, 0
     14   %or.cond = and i1 %cmp, %cmp122
     15   br i1 %or.cond, label %for.outer.preheader, label %for.end
     16 
     17 for.outer.preheader:
     18   br label %for.outer
     19 
     20 for.outer:
     21   %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
     22   br label %for.inner
     23 
     24 for.inner:
     25   %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
     26   %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
     27   %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
     28   %0 = load i32, i32* %arrayidx.us, align 4
     29   %add.us = add i32 %0, %sum1.us
     30   %inc.us = add nuw i32 %j.us, 1
     31   %exitcond = icmp eq i32 %inc.us, %J
     32   br i1 %exitcond, label %for.latch, label %for.inner
     33 
     34 for.latch:
     35   %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
     36   %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
     37   store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
     38   %add8.us = add nuw i32 %i.us, 1
     39   %exitcond25 = icmp eq i32 %add8.us, %I
     40   br i1 %exitcond25, label %for.end.loopexit, label %for.outer
     41 
     42 for.end.loopexit:
     43   br label %for.end
     44 
     45 for.end:
     46   ret void
     47 }
     48 
     49 
     50 ; CHECK-LABEL: nounroll_and_jam
     51 ; #pragma nounroll_and_jam
     52 define void @nounroll_and_jam(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
     53 ; CHECK: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
     54 entry:
     55   %cmp = icmp ne i32 %J, 0
     56   %cmp122 = icmp ne i32 %I, 0
     57   %or.cond = and i1 %cmp, %cmp122
     58   br i1 %or.cond, label %for.outer.preheader, label %for.end
     59 
     60 for.outer.preheader:
     61   br label %for.outer
     62 
     63 for.outer:
     64   %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
     65   br label %for.inner
     66 
     67 for.inner:
     68   %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
     69   %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
     70   %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
     71   %0 = load i32, i32* %arrayidx.us, align 4
     72   %add.us = add i32 %0, %sum1.us
     73   %inc.us = add nuw i32 %j.us, 1
     74   %exitcond = icmp eq i32 %inc.us, %J
     75   br i1 %exitcond, label %for.latch, label %for.inner
     76 
     77 for.latch:
     78   %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
     79   %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
     80   store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
     81   %add8.us = add nuw i32 %i.us, 1
     82   %exitcond25 = icmp eq i32 %add8.us, %I
     83   br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !1
     84 
     85 for.end.loopexit:
     86   br label %for.end
     87 
     88 for.end:
     89   ret void
     90 }
     91 
     92 
     93 ; CHECK-LABEL: unroll_and_jam_count
     94 ; #pragma unroll_and_jam(8)
     95 define void @unroll_and_jam_count(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
     96 ; CHECK: %i.us = phi i32 [ %add8.us.7, %for.latch ], [ 0, %for.outer.preheader.new ]
     97 entry:
     98   %cmp = icmp ne i32 %J, 0
     99   %cmp122 = icmp ne i32 %I, 0
    100   %or.cond = and i1 %cmp, %cmp122
    101   br i1 %or.cond, label %for.outer.preheader, label %for.end
    102 
    103 for.outer.preheader:
    104   br label %for.outer
    105 
    106 for.outer:
    107   %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
    108   br label %for.inner
    109 
    110 for.inner:
    111   %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
    112   %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
    113   %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
    114   %0 = load i32, i32* %arrayidx.us, align 4
    115   %add.us = add i32 %0, %sum1.us
    116   %inc.us = add nuw i32 %j.us, 1
    117   %exitcond = icmp eq i32 %inc.us, %J
    118   br i1 %exitcond, label %for.latch, label %for.inner
    119 
    120 for.latch:
    121   %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
    122   %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
    123   store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
    124   %add8.us = add nuw i32 %i.us, 1
    125   %exitcond25 = icmp eq i32 %add8.us, %I
    126   br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !3
    127 
    128 for.end.loopexit:
    129   br label %for.end
    130 
    131 for.end:
    132   ret void
    133 }
    134 
    135 
    136 ; CHECK-LABEL: unroll_and_jam
    137 ; #pragma unroll_and_jam
    138 define void @unroll_and_jam(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
    139 ; CHECK: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ]
    140 ; CHECK-LOWTHRES: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ]
    141 entry:
    142   %cmp = icmp ne i32 %J, 0
    143   %cmp122 = icmp ne i32 %I, 0
    144   %or.cond = and i1 %cmp, %cmp122
    145   br i1 %or.cond, label %for.outer.preheader, label %for.end
    146 
    147 for.outer.preheader:
    148   br label %for.outer
    149 
    150 for.outer:
    151   %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
    152   br label %for.inner
    153 
    154 for.inner:
    155   %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
    156   %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
    157   %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
    158   %0 = load i32, i32* %arrayidx.us, align 4
    159   %add.us = add i32 %0, %sum1.us
    160   %inc.us = add nuw i32 %j.us, 1
    161   %exitcond = icmp eq i32 %inc.us, %J
    162   br i1 %exitcond, label %for.latch, label %for.inner
    163 
    164 for.latch:
    165   %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
    166   %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
    167   store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
    168   %add8.us = add nuw i32 %i.us, 1
    169   %exitcond25 = icmp eq i32 %add8.us, %I
    170   br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !5
    171 
    172 for.end.loopexit:
    173   br label %for.end
    174 
    175 for.end:
    176   ret void
    177 }
    178 
    179 
    180 ; CHECK-LABEL: nounroll
    181 ; #pragma nounroll (which we take to mean disable unroll and jam too)
    182 define void @nounroll(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
    183 ; CHECK: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
    184 entry:
    185   %cmp = icmp ne i32 %J, 0
    186   %cmp122 = icmp ne i32 %I, 0
    187   %or.cond = and i1 %cmp, %cmp122
    188   br i1 %or.cond, label %for.outer.preheader, label %for.end
    189 
    190 for.outer.preheader:
    191   br label %for.outer
    192 
    193 for.outer:
    194   %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
    195   br label %for.inner
    196 
    197 for.inner:
    198   %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
    199   %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
    200   %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
    201   %0 = load i32, i32* %arrayidx.us, align 4
    202   %add.us = add i32 %0, %sum1.us
    203   %inc.us = add nuw i32 %j.us, 1
    204   %exitcond = icmp eq i32 %inc.us, %J
    205   br i1 %exitcond, label %for.latch, label %for.inner
    206 
    207 for.latch:
    208   %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
    209   %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
    210   store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
    211   %add8.us = add nuw i32 %i.us, 1
    212   %exitcond25 = icmp eq i32 %add8.us, %I
    213   br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !7
    214 
    215 for.end.loopexit:
    216   br label %for.end
    217 
    218 for.end:
    219   ret void
    220 }
    221 
    222 
    223 ; CHECK-LABEL: unroll
    224 ; #pragma unroll (which we take to mean disable unroll and jam)
    225 define void @unroll(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
    226 ; CHECK: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
    227 entry:
    228   %cmp = icmp ne i32 %J, 0
    229   %cmp122 = icmp ne i32 %I, 0
    230   %or.cond = and i1 %cmp, %cmp122
    231   br i1 %or.cond, label %for.outer.preheader, label %for.end
    232 
    233 for.outer.preheader:
    234   br label %for.outer
    235 
    236 for.outer:
    237   %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
    238   br label %for.inner
    239 
    240 for.inner:
    241   %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
    242   %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
    243   %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
    244   %0 = load i32, i32* %arrayidx.us, align 4
    245   %add.us = add i32 %0, %sum1.us
    246   %inc.us = add nuw i32 %j.us, 1
    247   %exitcond = icmp eq i32 %inc.us, %J
    248   br i1 %exitcond, label %for.latch, label %for.inner
    249 
    250 for.latch:
    251   %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
    252   %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
    253   store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
    254   %add8.us = add nuw i32 %i.us, 1
    255   %exitcond25 = icmp eq i32 %add8.us, %I
    256   br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !9
    257 
    258 for.end.loopexit:
    259   br label %for.end
    260 
    261 for.end:
    262   ret void
    263 }
    264 
    265 
    266 ; CHECK-LABEL: nounroll_plus_unroll_and_jam
    267 ; #pragma clang loop nounroll, unroll_and_jam (which we take to mean do unroll_and_jam)
    268 define void @nounroll_plus_unroll_and_jam(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
    269 ; CHECK: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ]
    270 entry:
    271   %cmp = icmp ne i32 %J, 0
    272   %cmp122 = icmp ne i32 %I, 0
    273   %or.cond = and i1 %cmp, %cmp122
    274   br i1 %or.cond, label %for.outer.preheader, label %for.end
    275 
    276 for.outer.preheader:
    277   br label %for.outer
    278 
    279 for.outer:
    280   %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
    281   br label %for.inner
    282 
    283 for.inner:
    284   %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
    285   %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
    286   %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
    287   %0 = load i32, i32* %arrayidx.us, align 4
    288   %add.us = add i32 %0, %sum1.us
    289   %inc.us = add nuw i32 %j.us, 1
    290   %exitcond = icmp eq i32 %inc.us, %J
    291   br i1 %exitcond, label %for.latch, label %for.inner
    292 
    293 for.latch:
    294   %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
    295   %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
    296   store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
    297   %add8.us = add nuw i32 %i.us, 1
    298   %exitcond25 = icmp eq i32 %add8.us, %I
    299   br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !11
    300 
    301 for.end.loopexit:
    302   br label %for.end
    303 
    304 for.end:
    305   ret void
    306 }
    307 
    308 
    309 !1 = distinct !{!1, !2}
    310 !2 = distinct !{!"llvm.loop.unroll_and_jam.disable"}
    311 !3 = distinct !{!3, !4}
    312 !4 = distinct !{!"llvm.loop.unroll_and_jam.count", i32 8}
    313 !5 = distinct !{!5, !6}
    314 !6 = distinct !{!"llvm.loop.unroll_and_jam.enable"}
    315 !7 = distinct !{!7, !8}
    316 !8 = distinct !{!"llvm.loop.unroll.disable"}
    317 !9 = distinct !{!9, !10}
    318 !10 = distinct !{!"llvm.loop.unroll.enable"}
    319 !11 = distinct !{!11, !8, !6}