Home | History | Annotate | Download | only in LoopUnroll
      1 ; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
      2 ; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
      3 ;
      4 ; Run loop unrolling twice to verify that loop unrolling metadata is properly
      5 ; removed and further unrolling is disabled after the pass is run once.
      6 
      7 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
      8 target triple = "x86_64-unknown-linux-gnu"
      9 
     10 ; loop4 contains a small loop which should be completely unrolled by
     11 ; the default unrolling heuristics.  It serves as a control for the
     12 ; unroll(disable) pragma test loop4_with_disable.
     13 ;
     14 ; CHECK-LABEL: @loop4(
     15 ; CHECK-NOT: br i1
     16 define void @loop4(i32* nocapture %a) {
     17 entry:
     18   br label %for.body
     19 
     20 for.body:                                         ; preds = %for.body, %entry
     21   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     22   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
     23   %0 = load i32, i32* %arrayidx, align 4
     24   %inc = add nsw i32 %0, 1
     25   store i32 %inc, i32* %arrayidx, align 4
     26   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
     27   %exitcond = icmp eq i64 %indvars.iv.next, 4
     28   br i1 %exitcond, label %for.end, label %for.body
     29 
     30 for.end:                                          ; preds = %for.body
     31   ret void
     32 }
     33 
     34 ; #pragma clang loop unroll(disable)
     35 ;
     36 ; CHECK-LABEL: @loop4_with_disable(
     37 ; CHECK: store i32
     38 ; CHECK-NOT: store i32
     39 ; CHECK: br i1
     40 define void @loop4_with_disable(i32* nocapture %a) {
     41 entry:
     42   br label %for.body
     43 
     44 for.body:                                         ; preds = %for.body, %entry
     45   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     46   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
     47   %0 = load i32, i32* %arrayidx, align 4
     48   %inc = add nsw i32 %0, 1
     49   store i32 %inc, i32* %arrayidx, align 4
     50   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
     51   %exitcond = icmp eq i64 %indvars.iv.next, 4
     52   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
     53 
     54 for.end:                                          ; preds = %for.body
     55   ret void
     56 }
     57 !1 = !{!1, !2}
     58 !2 = !{!"llvm.loop.unroll.disable"}
     59 
     60 ; loop64 has a high enough count that it should *not* be unrolled by
     61 ; the default unrolling heuristic.  It serves as the control for the
     62 ; unroll(full) pragma test loop64_with_.* tests below.
     63 ;
     64 ; CHECK-LABEL: @loop64(
     65 ; CHECK: store i32
     66 ; CHECK-NOT: store i32
     67 ; CHECK: br i1
     68 define void @loop64(i32* nocapture %a) {
     69 entry:
     70   br label %for.body
     71 
     72 for.body:                                         ; preds = %for.body, %entry
     73   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     74   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
     75   %0 = load i32, i32* %arrayidx, align 4
     76   %inc = add nsw i32 %0, 1
     77   store i32 %inc, i32* %arrayidx, align 4
     78   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
     79   %exitcond = icmp eq i64 %indvars.iv.next, 64
     80   br i1 %exitcond, label %for.end, label %for.body
     81 
     82 for.end:                                          ; preds = %for.body
     83   ret void
     84 }
     85 
     86 ; #pragma clang loop unroll(full)
     87 ; Loop should be fully unrolled.
     88 ;
     89 ; CHECK-LABEL: @loop64_with_full(
     90 ; CHECK-NOT: br i1
     91 define void @loop64_with_full(i32* nocapture %a) {
     92 entry:
     93   br label %for.body
     94 
     95 for.body:                                         ; preds = %for.body, %entry
     96   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     97   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
     98   %0 = load i32, i32* %arrayidx, align 4
     99   %inc = add nsw i32 %0, 1
    100   store i32 %inc, i32* %arrayidx, align 4
    101   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    102   %exitcond = icmp eq i64 %indvars.iv.next, 64
    103   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
    104 
    105 for.end:                                          ; preds = %for.body
    106   ret void
    107 }
    108 !3 = !{!3, !4}
    109 !4 = !{!"llvm.loop.unroll.full"}
    110 
    111 ; #pragma clang loop unroll_count(4)
    112 ; Loop should be unrolled 4 times.
    113 ;
    114 ; CHECK-LABEL: @loop64_with_count4(
    115 ; CHECK: store i32
    116 ; CHECK: store i32
    117 ; CHECK: store i32
    118 ; CHECK: store i32
    119 ; CHECK-NOT: store i32
    120 ; CHECK: br i1
    121 define void @loop64_with_count4(i32* nocapture %a) {
    122 entry:
    123   br label %for.body
    124 
    125 for.body:                                         ; preds = %for.body, %entry
    126   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    127   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    128   %0 = load i32, i32* %arrayidx, align 4
    129   %inc = add nsw i32 %0, 1
    130   store i32 %inc, i32* %arrayidx, align 4
    131   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    132   %exitcond = icmp eq i64 %indvars.iv.next, 64
    133   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
    134 
    135 for.end:                                          ; preds = %for.body
    136   ret void
    137 }
    138 !5 = !{!5, !6}
    139 !6 = !{!"llvm.loop.unroll.count", i32 4}
    140 
    141 ; #pragma clang loop unroll(full)
    142 ; Full unrolling is requested, but loop has a runtime trip count so
    143 ; no unrolling should occur.
    144 ;
    145 ; CHECK-LABEL: @runtime_loop_with_full(
    146 ; CHECK: store i32
    147 ; CHECK-NOT: store i32
    148 define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) {
    149 entry:
    150   %cmp3 = icmp sgt i32 %b, 0
    151   br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
    152 
    153 for.body:                                         ; preds = %entry, %for.body
    154   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    155   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    156   %0 = load i32, i32* %arrayidx, align 4
    157   %inc = add nsw i32 %0, 1
    158   store i32 %inc, i32* %arrayidx, align 4
    159   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    160   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    161   %exitcond = icmp eq i32 %lftr.wideiv, %b
    162   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8
    163 
    164 for.end:                                          ; preds = %for.body, %entry
    165   ret void
    166 }
    167 !8 = !{!8, !4}
    168 
    169 ; #pragma clang loop unroll_count(4)
    170 ; Loop has a runtime trip count.  Runtime unrolling should occur and loop
    171 ; should be duplicated (original and 4x unrolled).
    172 ;
    173 ; CHECK-LABEL: @runtime_loop_with_count4(
    174 ; CHECK: for.body.prol:
    175 ; CHECK: store
    176 ; CHECK-NOT: store
    177 ; CHECK: br i1
    178 ; CHECK: for.body
    179 ; CHECK: store
    180 ; CHECK: store
    181 ; CHECK: store
    182 ; CHECK: store
    183 ; CHECK-NOT: store
    184 ; CHECK: br i1
    185 define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
    186 entry:
    187   %cmp3 = icmp sgt i32 %b, 0
    188   br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9
    189 
    190 for.body:                                         ; preds = %entry, %for.body
    191   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    192   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    193   %0 = load i32, i32* %arrayidx, align 4
    194   %inc = add nsw i32 %0, 1
    195   store i32 %inc, i32* %arrayidx, align 4
    196   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    197   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    198   %exitcond = icmp eq i32 %lftr.wideiv, %b
    199   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9
    200 
    201 for.end:                                          ; preds = %for.body, %entry
    202   ret void
    203 }
    204 !9 = !{!9, !6}
    205 
    206 ; #pragma clang loop unroll_count(1)
    207 ; Loop should not be unrolled
    208 ;
    209 ; CHECK-LABEL: @unroll_1(
    210 ; CHECK: store i32
    211 ; CHECK-NOT: store i32
    212 ; CHECK: br i1
    213 define void @unroll_1(i32* nocapture %a, i32 %b) {
    214 entry:
    215   br label %for.body
    216 
    217 for.body:                                         ; preds = %for.body, %entry
    218   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    219   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    220   %0 = load i32, i32* %arrayidx, align 4
    221   %inc = add nsw i32 %0, 1
    222   store i32 %inc, i32* %arrayidx, align 4
    223   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    224   %exitcond = icmp eq i64 %indvars.iv.next, 4
    225   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10
    226 
    227 for.end:                                          ; preds = %for.body
    228   ret void
    229 }
    230 !10 = !{!10, !11}
    231 !11 = !{!"llvm.loop.unroll.count", i32 1}
    232 
    233 ; #pragma clang loop unroll(full)
    234 ; Loop has very high loop count (1 million) and full unrolling was requested.
    235 ; Loop should unrolled up to the pragma threshold, but not completely.
    236 ;
    237 ; CHECK-LABEL: @unroll_1M(
    238 ; CHECK: store i32
    239 ; CHECK: store i32
    240 ; CHECK: br i1
    241 define void @unroll_1M(i32* nocapture %a, i32 %b) {
    242 entry:
    243   br label %for.body
    244 
    245 for.body:                                         ; preds = %for.body, %entry
    246   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    247   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    248   %0 = load i32, i32* %arrayidx, align 4
    249   %inc = add nsw i32 %0, 1
    250   store i32 %inc, i32* %arrayidx, align 4
    251   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    252   %exitcond = icmp eq i64 %indvars.iv.next, 1000000
    253   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12
    254 
    255 for.end:                                          ; preds = %for.body
    256   ret void
    257 }
    258 !12 = !{!12, !4}
    259 
    260 ; #pragma clang loop unroll(enable)
    261 ; Loop should be fully unrolled.
    262 ;
    263 ; CHECK-LABEL: @loop64_with_enable(
    264 ; CHECK-NOT: br i1
    265 define void @loop64_with_enable(i32* nocapture %a) {
    266 entry:
    267   br label %for.body
    268 
    269 for.body:                                         ; preds = %for.body, %entry
    270   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    271   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    272   %0 = load i32, i32* %arrayidx, align 4
    273   %inc = add nsw i32 %0, 1
    274   store i32 %inc, i32* %arrayidx, align 4
    275   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    276   %exitcond = icmp eq i64 %indvars.iv.next, 64
    277   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13
    278 
    279 for.end:                                          ; preds = %for.body
    280   ret void
    281 }
    282 !13 = !{!13, !14}
    283 !14 = !{!"llvm.loop.unroll.enable"}
    284 
    285 ; #pragma clang loop unroll(enable)
    286 ; Loop has a runtime trip count and should be runtime unrolled and duplicated
    287 ; (original and 8x).
    288 ;
    289 ; CHECK-LABEL: @runtime_loop_with_enable(
    290 ; CHECK: for.body.prol:
    291 ; CHECK: store
    292 ; CHECK-NOT: store
    293 ; CHECK: br i1
    294 ; CHECK: for.body:
    295 ; CHECK: store i32
    296 ; CHECK: store i32
    297 ; CHECK: store i32
    298 ; CHECK: store i32
    299 ; CHECK: store i32
    300 ; CHECK: store i32
    301 ; CHECK: store i32
    302 ; CHECK: store i32
    303 ; CHECK-NOT: store i32
    304 ; CHECK: br i1
    305 define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
    306 entry:
    307   %cmp3 = icmp sgt i32 %b, 0
    308   br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
    309 
    310 for.body:                                         ; preds = %entry, %for.body
    311   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    312   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    313   %0 = load i32, i32* %arrayidx, align 4
    314   %inc = add nsw i32 %0, 1
    315   store i32 %inc, i32* %arrayidx, align 4
    316   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    317   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    318   %exitcond = icmp eq i32 %lftr.wideiv, %b
    319   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
    320 
    321 for.end:                                          ; preds = %for.body, %entry
    322   ret void
    323 }
    324 !15 = !{!15, !14}
    325