Home | History | Annotate | Download | only in LoopUnroll
      1 ; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
      2 ; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
      3 ;
      4 ; Run loop unrolling twice to verify that loop unrolling metadata is properly
      5 ; removed and further unrolling is disabled after the pass is run once.
      6 
      7 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
      8 target triple = "x86_64-unknown-linux-gnu"
      9 
     10 ; loop4 contains a small loop which should be completely unrolled by
     11 ; the default unrolling heuristics.  It serves as a control for the
     12 ; unroll(disable) pragma test loop4_with_disable.
     13 ;
     14 ; CHECK-LABEL: @loop4(
     15 ; CHECK-NOT: br i1
     16 define void @loop4(i32* nocapture %a) {
     17 entry:
     18   br label %for.body
     19 
     20 for.body:                                         ; preds = %for.body, %entry
     21   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     22   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
     23   %0 = load i32, i32* %arrayidx, align 4
     24   %inc = add nsw i32 %0, 1
     25   store i32 %inc, i32* %arrayidx, align 4
     26   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
     27   %exitcond = icmp eq i64 %indvars.iv.next, 4
     28   br i1 %exitcond, label %for.end, label %for.body
     29 
     30 for.end:                                          ; preds = %for.body
     31   ret void
     32 }
     33 
     34 ; #pragma clang loop unroll(disable)
     35 ;
     36 ; CHECK-LABEL: @loop4_with_disable(
     37 ; CHECK: store i32
     38 ; CHECK-NOT: store i32
     39 ; CHECK: br i1
     40 define void @loop4_with_disable(i32* nocapture %a) {
     41 entry:
     42   br label %for.body
     43 
     44 for.body:                                         ; preds = %for.body, %entry
     45   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     46   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
     47   %0 = load i32, i32* %arrayidx, align 4
     48   %inc = add nsw i32 %0, 1
     49   store i32 %inc, i32* %arrayidx, align 4
     50   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
     51   %exitcond = icmp eq i64 %indvars.iv.next, 4
     52   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
     53 
     54 for.end:                                          ; preds = %for.body
     55   ret void
     56 }
     57 !1 = !{!1, !2}
     58 !2 = !{!"llvm.loop.unroll.disable"}
     59 
     60 ; loop64 has a high enough count that it should *not* be unrolled by
     61 ; the default unrolling heuristic.  It serves as the control for the
     62 ; unroll(full) pragma test loop64_with_.* tests below.
     63 ;
     64 ; CHECK-LABEL: @loop64(
     65 ; CHECK: store i32
     66 ; CHECK-NOT: store i32
     67 ; CHECK: br i1
     68 define void @loop64(i32* nocapture %a) {
     69 entry:
     70   br label %for.body
     71 
     72 for.body:                                         ; preds = %for.body, %entry
     73   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     74   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
     75   %0 = load i32, i32* %arrayidx, align 4
     76   %inc = add nsw i32 %0, 1
     77   store i32 %inc, i32* %arrayidx, align 4
     78   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
     79   %exitcond = icmp eq i64 %indvars.iv.next, 64
     80   br i1 %exitcond, label %for.end, label %for.body
     81 
     82 for.end:                                          ; preds = %for.body
     83   ret void
     84 }
     85 
     86 ; #pragma clang loop unroll(full)
     87 ; Loop should be fully unrolled.
     88 ;
     89 ; CHECK-LABEL: @loop64_with_full(
     90 ; CHECK-NOT: br i1
     91 define void @loop64_with_full(i32* nocapture %a) {
     92 entry:
     93   br label %for.body
     94 
     95 for.body:                                         ; preds = %for.body, %entry
     96   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     97   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
     98   %0 = load i32, i32* %arrayidx, align 4
     99   %inc = add nsw i32 %0, 1
    100   store i32 %inc, i32* %arrayidx, align 4
    101   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    102   %exitcond = icmp eq i64 %indvars.iv.next, 64
    103   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
    104 
    105 for.end:                                          ; preds = %for.body
    106   ret void
    107 }
    108 !3 = !{!3, !4}
    109 !4 = !{!"llvm.loop.unroll.full"}
    110 
    111 ; #pragma clang loop unroll(full)
    112 ; Loop should be fully unrolled, even for optsize.
    113 ;
    114 ; CHECK-LABEL: @loop64_with_full_optsize(
    115 ; CHECK-NOT: br i1
    116 define void @loop64_with_full_optsize(i32* nocapture %a) optsize {
    117 entry:
    118   br label %for.body
    119 
    120 for.body:                                         ; preds = %for.body, %entry
    121   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    122   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    123   %0 = load i32, i32* %arrayidx, align 4
    124   %inc = add nsw i32 %0, 1
    125   store i32 %inc, i32* %arrayidx, align 4
    126   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    127   %exitcond = icmp eq i64 %indvars.iv.next, 64
    128   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
    129 
    130 for.end:                                          ; preds = %for.body
    131   ret void
    132 }
    133 
    134 ; #pragma clang loop unroll_count(4)
    135 ; Loop should be unrolled 4 times.
    136 ;
    137 ; CHECK-LABEL: @loop64_with_count4(
    138 ; CHECK: store i32
    139 ; CHECK: store i32
    140 ; CHECK: store i32
    141 ; CHECK: store i32
    142 ; CHECK-NOT: store i32
    143 ; CHECK: br i1
    144 define void @loop64_with_count4(i32* nocapture %a) {
    145 entry:
    146   br label %for.body
    147 
    148 for.body:                                         ; preds = %for.body, %entry
    149   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    150   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    151   %0 = load i32, i32* %arrayidx, align 4
    152   %inc = add nsw i32 %0, 1
    153   store i32 %inc, i32* %arrayidx, align 4
    154   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    155   %exitcond = icmp eq i64 %indvars.iv.next, 64
    156   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
    157 
    158 for.end:                                          ; preds = %for.body
    159   ret void
    160 }
    161 !5 = !{!5, !6}
    162 !6 = !{!"llvm.loop.unroll.count", i32 4}
    163 
    164 ; #pragma clang loop unroll(full)
    165 ; Full unrolling is requested, but loop has a runtime trip count so
    166 ; no unrolling should occur.
    167 ;
    168 ; CHECK-LABEL: @runtime_loop_with_full(
    169 ; CHECK: store i32
    170 ; CHECK-NOT: store i32
    171 define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) {
    172 entry:
    173   %cmp3 = icmp sgt i32 %b, 0
    174   br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
    175 
    176 for.body:                                         ; preds = %entry, %for.body
    177   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    178   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    179   %0 = load i32, i32* %arrayidx, align 4
    180   %inc = add nsw i32 %0, 1
    181   store i32 %inc, i32* %arrayidx, align 4
    182   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    183   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    184   %exitcond = icmp eq i32 %lftr.wideiv, %b
    185   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8
    186 
    187 for.end:                                          ; preds = %for.body, %entry
    188   ret void
    189 }
    190 !8 = !{!8, !4}
    191 
    192 ; #pragma clang loop unroll_count(4)
    193 ; Loop has a runtime trip count.  Runtime unrolling should occur and loop
    194 ; should be duplicated (original and 4x unrolled).
    195 ;
    196 ; CHECK-LABEL: @runtime_loop_with_count4(
    197 ; CHECK: for.body
    198 ; CHECK: store
    199 ; CHECK: store
    200 ; CHECK: store
    201 ; CHECK: store
    202 ; CHECK-NOT: store
    203 ; CHECK: br i1
    204 ; CHECK: for.body.epil:
    205 ; CHECK: store
    206 ; CHECK-NOT: store
    207 ; CHECK: br i1
    208 define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
    209 entry:
    210   %cmp3 = icmp sgt i32 %b, 0
    211   br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9
    212 
    213 for.body:                                         ; preds = %entry, %for.body
    214   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    215   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    216   %0 = load i32, i32* %arrayidx, align 4
    217   %inc = add nsw i32 %0, 1
    218   store i32 %inc, i32* %arrayidx, align 4
    219   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    220   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    221   %exitcond = icmp eq i32 %lftr.wideiv, %b
    222   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9
    223 
    224 for.end:                                          ; preds = %for.body, %entry
    225   ret void
    226 }
    227 !9 = !{!9, !6}
    228 
    229 ; #pragma clang loop unroll_count(1)
    230 ; Loop should not be unrolled
    231 ;
    232 ; CHECK-LABEL: @unroll_1(
    233 ; CHECK: store i32
    234 ; CHECK-NOT: store i32
    235 ; CHECK: br i1
    236 define void @unroll_1(i32* nocapture %a, i32 %b) {
    237 entry:
    238   br label %for.body
    239 
    240 for.body:                                         ; preds = %for.body, %entry
    241   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    242   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    243   %0 = load i32, i32* %arrayidx, align 4
    244   %inc = add nsw i32 %0, 1
    245   store i32 %inc, i32* %arrayidx, align 4
    246   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    247   %exitcond = icmp eq i64 %indvars.iv.next, 4
    248   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10
    249 
    250 for.end:                                          ; preds = %for.body
    251   ret void
    252 }
    253 !10 = !{!10, !11}
    254 !11 = !{!"llvm.loop.unroll.count", i32 1}
    255 
    256 ; #pragma clang loop unroll(full)
    257 ; Loop has very high loop count (1 million) and full unrolling was requested.
    258 ; Loop should unrolled up to the pragma threshold, but not completely.
    259 ;
    260 ; CHECK-LABEL: @unroll_1M(
    261 ; CHECK: store i32
    262 ; CHECK: store i32
    263 ; CHECK: br i1
    264 define void @unroll_1M(i32* nocapture %a, i32 %b) {
    265 entry:
    266   br label %for.body
    267 
    268 for.body:                                         ; preds = %for.body, %entry
    269   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    270   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    271   %0 = load i32, i32* %arrayidx, align 4
    272   %inc = add nsw i32 %0, 1
    273   store i32 %inc, i32* %arrayidx, align 4
    274   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    275   %exitcond = icmp eq i64 %indvars.iv.next, 1000000
    276   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12
    277 
    278 for.end:                                          ; preds = %for.body
    279   ret void
    280 }
    281 !12 = !{!12, !4}
    282 
    283 ; #pragma clang loop unroll(enable)
    284 ; Loop should be fully unrolled.
    285 ;
    286 ; CHECK-LABEL: @loop64_with_enable(
    287 ; CHECK-NOT: br i1
    288 define void @loop64_with_enable(i32* nocapture %a) {
    289 entry:
    290   br label %for.body
    291 
    292 for.body:                                         ; preds = %for.body, %entry
    293   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    294   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    295   %0 = load i32, i32* %arrayidx, align 4
    296   %inc = add nsw i32 %0, 1
    297   store i32 %inc, i32* %arrayidx, align 4
    298   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    299   %exitcond = icmp eq i64 %indvars.iv.next, 64
    300   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13
    301 
    302 for.end:                                          ; preds = %for.body
    303   ret void
    304 }
    305 !13 = !{!13, !14}
    306 !14 = !{!"llvm.loop.unroll.enable"}
    307 
    308 ; #pragma clang loop unroll(enable)
    309 ; Loop has a runtime trip count and should be runtime unrolled and duplicated
    310 ; (original and 8x).
    311 ;
    312 ; CHECK-LABEL: @runtime_loop_with_enable(
    313 ; CHECK: for.body:
    314 ; CHECK: store i32
    315 ; CHECK: store i32
    316 ; CHECK: store i32
    317 ; CHECK: store i32
    318 ; CHECK: store i32
    319 ; CHECK: store i32
    320 ; CHECK: store i32
    321 ; CHECK: store i32
    322 ; CHECK-NOT: store i32
    323 ; CHECK: br i1
    324 ; CHECK: for.body.epil:
    325 ; CHECK: store
    326 ; CHECK-NOT: store
    327 ; CHECK: br i1
    328 define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
    329 entry:
    330   %cmp3 = icmp sgt i32 %b, 0
    331   br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
    332 
    333 for.body:                                         ; preds = %entry, %for.body
    334   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    335   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    336   %0 = load i32, i32* %arrayidx, align 4
    337   %inc = add nsw i32 %0, 1
    338   store i32 %inc, i32* %arrayidx, align 4
    339   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    340   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    341   %exitcond = icmp eq i32 %lftr.wideiv, %b
    342   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
    343 
    344 for.end:                                          ; preds = %for.body, %entry
    345   ret void
    346 }
    347 !15 = !{!15, !14}
    348 
    349 ; #pragma clang loop unroll_count(3)
    350 ; Loop has a runtime trip count.  Runtime unrolling should occur and loop
    351 ; should be duplicated (original and 3x unrolled).
    352 ;
    353 ; CHECK-LABEL: @runtime_loop_with_count3(
    354 ; CHECK: for.body
    355 ; CHECK: store
    356 ; CHECK: store
    357 ; CHECK: store
    358 ; CHECK-NOT: store
    359 ; CHECK: br i1
    360 ; CHECK: for.body.epil:
    361 ; CHECK: store
    362 ; CHECK-NOT: store
    363 ; CHECK: br i1
    364 define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
    365 entry:
    366   %cmp3 = icmp sgt i32 %b, 0
    367   br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16
    368 
    369 for.body:                                         ; preds = %entry, %for.body
    370   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    371   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    372   %0 = load i32, i32* %arrayidx, align 4
    373   %inc = add nsw i32 %0, 1
    374   store i32 %inc, i32* %arrayidx, align 4
    375   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    376   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    377   %exitcond = icmp eq i32 %lftr.wideiv, %b
    378   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !16
    379 
    380 for.end:                                          ; preds = %for.body, %entry
    381   ret void
    382 }
    383 !16 = !{!16, !17}
    384 !17 = !{!"llvm.loop.unroll.count", i32 3}
    385