Home | History | Annotate | Download | only in LoopUnroll
      1 ; RUN: opt < %s -loop-unroll -S | FileCheck %s
      2 
      3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
      4 target triple = "x86_64-unknown-linux-gnu"
      5 
      6 ; loop4 contains a small loop which should be completely unrolled by
      7 ; the default unrolling heuristics.  It serves as a control for the
      8 ; unroll(disable) pragma test loop4_with_disable.
      9 ;
     10 ; CHECK-LABEL: @loop4(
     11 ; CHECK-NOT: br i1
     12 define void @loop4(i32* nocapture %a) {
     13 entry:
     14   br label %for.body
     15 
     16 for.body:                                         ; preds = %for.body, %entry
     17   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     18   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
     19   %0 = load i32* %arrayidx, align 4
     20   %inc = add nsw i32 %0, 1
     21   store i32 %inc, i32* %arrayidx, align 4
     22   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
     23   %exitcond = icmp eq i64 %indvars.iv.next, 4
     24   br i1 %exitcond, label %for.end, label %for.body
     25 
     26 for.end:                                          ; preds = %for.body
     27   ret void
     28 }
     29 
     30 ; #pragma clang loop unroll(disable)
     31 ;
     32 ; CHECK-LABEL: @loop4_with_disable(
     33 ; CHECK: store i32
     34 ; CHECK-NOT: store i32
     35 ; CHECK: br i1
     36 define void @loop4_with_disable(i32* nocapture %a) {
     37 entry:
     38   br label %for.body
     39 
     40 for.body:                                         ; preds = %for.body, %entry
     41   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     42   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
     43   %0 = load i32* %arrayidx, align 4
     44   %inc = add nsw i32 %0, 1
     45   store i32 %inc, i32* %arrayidx, align 4
     46   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
     47   %exitcond = icmp eq i64 %indvars.iv.next, 4
     48   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
     49 
     50 for.end:                                          ; preds = %for.body
     51   ret void
     52 }
     53 !1 = metadata !{metadata !1, metadata !2}
     54 !2 = metadata !{metadata !"llvm.loop.unroll.enable", i1 false}
     55 
     56 ; loop64 has a high enough count that it should *not* be unrolled by
     57 ; the default unrolling heuristic.  It serves as the control for the
     58 ; unroll(enable) pragma test loop64_with_.* tests below.
     59 ;
     60 ; CHECK-LABEL: @loop64(
     61 ; CHECK: store i32
     62 ; CHECK-NOT: store i32
     63 ; CHECK: br i1
     64 define void @loop64(i32* nocapture %a) {
     65 entry:
     66   br label %for.body
     67 
     68 for.body:                                         ; preds = %for.body, %entry
     69   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     70   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
     71   %0 = load i32* %arrayidx, align 4
     72   %inc = add nsw i32 %0, 1
     73   store i32 %inc, i32* %arrayidx, align 4
     74   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
     75   %exitcond = icmp eq i64 %indvars.iv.next, 64
     76   br i1 %exitcond, label %for.end, label %for.body
     77 
     78 for.end:                                          ; preds = %for.body
     79   ret void
     80 }
     81 
     82 ; #pragma clang loop unroll(enable)
     83 ; Loop should be fully unrolled.
     84 ;
     85 ; CHECK-LABEL: @loop64_with_enable(
     86 ; CHECK-NOT: br i1
     87 define void @loop64_with_enable(i32* nocapture %a) {
     88 entry:
     89   br label %for.body
     90 
     91 for.body:                                         ; preds = %for.body, %entry
     92   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     93   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
     94   %0 = load i32* %arrayidx, align 4
     95   %inc = add nsw i32 %0, 1
     96   store i32 %inc, i32* %arrayidx, align 4
     97   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
     98   %exitcond = icmp eq i64 %indvars.iv.next, 64
     99   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
    100 
    101 for.end:                                          ; preds = %for.body
    102   ret void
    103 }
    104 !3 = metadata !{metadata !3, metadata !4}
    105 !4 = metadata !{metadata !"llvm.loop.unroll.enable", i1 true}
    106 
    107 ; #pragma clang loop unroll_count(4)
    108 ; Loop should be unrolled 4 times.
    109 ;
    110 ; CHECK-LABEL: @loop64_with_count4(
    111 ; CHECK: store i32
    112 ; CHECK: store i32
    113 ; CHECK: store i32
    114 ; CHECK: store i32
    115 ; CHECK-NOT: store i32
    116 ; CHECK: br i1
    117 define void @loop64_with_count4(i32* nocapture %a) {
    118 entry:
    119   br label %for.body
    120 
    121 for.body:                                         ; preds = %for.body, %entry
    122   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    123   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
    124   %0 = load i32* %arrayidx, align 4
    125   %inc = add nsw i32 %0, 1
    126   store i32 %inc, i32* %arrayidx, align 4
    127   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    128   %exitcond = icmp eq i64 %indvars.iv.next, 64
    129   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
    130 
    131 for.end:                                          ; preds = %for.body
    132   ret void
    133 }
    134 !5 = metadata !{metadata !5, metadata !6}
    135 !6 = metadata !{metadata !"llvm.loop.unroll.count", i32 4}
    136 
    137 
    138 ; #pragma clang loop unroll_count(enable) unroll_count(4)
    139 ; Loop should be unrolled 4 times.
    140 ;
    141 ; CHECK-LABEL: @loop64_with_enable_and_count4(
    142 ; CHECK: store i32
    143 ; CHECK: store i32
    144 ; CHECK: store i32
    145 ; CHECK: store i32
    146 ; CHECK-NOT: store i32
    147 ; CHECK: br i1
    148 define void @loop64_with_enable_and_count4(i32* nocapture %a) {
    149 entry:
    150   br label %for.body
    151 
    152 for.body:                                         ; preds = %for.body, %entry
    153   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    154   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
    155   %0 = load i32* %arrayidx, align 4
    156   %inc = add nsw i32 %0, 1
    157   store i32 %inc, i32* %arrayidx, align 4
    158   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    159   %exitcond = icmp eq i64 %indvars.iv.next, 64
    160   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7
    161 
    162 for.end:                                          ; preds = %for.body
    163   ret void
    164 }
    165 !7 = metadata !{metadata !7, metadata !6, metadata !4}
    166 
    167 ; #pragma clang loop unroll_count(enable)
    168 ; Full unrolling is requested, but loop has a dynamic trip count so
    169 ; no unrolling should occur.
    170 ;
    171 ; CHECK-LABEL: @dynamic_loop_with_enable(
    172 ; CHECK: store i32
    173 ; CHECK-NOT: store i32
    174 ; CHECK: br i1
    175 define void @dynamic_loop_with_enable(i32* nocapture %a, i32 %b) {
    176 entry:
    177   %cmp3 = icmp sgt i32 %b, 0
    178   br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
    179 
    180 for.body:                                         ; preds = %entry, %for.body
    181   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    182   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
    183   %0 = load i32* %arrayidx, align 4
    184   %inc = add nsw i32 %0, 1
    185   store i32 %inc, i32* %arrayidx, align 4
    186   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    187   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    188   %exitcond = icmp eq i32 %lftr.wideiv, %b
    189   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8
    190 
    191 for.end:                                          ; preds = %for.body, %entry
    192   ret void
    193 }
    194 !8 = metadata !{metadata !8, metadata !4}
    195 
    196 ; #pragma clang loop unroll_count(4)
    197 ; Loop has a dynamic trip count.  Unrolling should occur, but no
    198 ; conditional branches can be removed.
    199 ;
    200 ; CHECK-LABEL: @dynamic_loop_with_count4(
    201 ; CHECK-NOT: store
    202 ; CHECK: br i1
    203 ; CHECK: store
    204 ; CHECK: br i1
    205 ; CHECK: store
    206 ; CHECK: br i1
    207 ; CHECK: store
    208 ; CHECK: br i1
    209 ; CHECK: store
    210 ; CHECK: br i1
    211 ; CHECK-NOT: br i1
    212 define void @dynamic_loop_with_count4(i32* nocapture %a, i32 %b) {
    213 entry:
    214   %cmp3 = icmp sgt i32 %b, 0
    215   br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9
    216 
    217 for.body:                                         ; preds = %entry, %for.body
    218   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    219   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
    220   %0 = load i32* %arrayidx, align 4
    221   %inc = add nsw i32 %0, 1
    222   store i32 %inc, i32* %arrayidx, align 4
    223   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    224   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    225   %exitcond = icmp eq i32 %lftr.wideiv, %b
    226   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9
    227 
    228 for.end:                                          ; preds = %for.body, %entry
    229   ret void
    230 }
    231 !9 = metadata !{metadata !9, metadata !6}
    232 
    233 ; #pragma clang loop unroll_count(1)
    234 ; Loop should not be unrolled
    235 ;
    236 ; CHECK-LABEL: @unroll_1(
    237 ; CHECK: store i32
    238 ; CHECK-NOT: store i32
    239 ; CHECK: br i1
    240 define void @unroll_1(i32* nocapture %a, i32 %b) {
    241 entry:
    242   br label %for.body
    243 
    244 for.body:                                         ; preds = %for.body, %entry
    245   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    246   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
    247   %0 = load i32* %arrayidx, align 4
    248   %inc = add nsw i32 %0, 1
    249   store i32 %inc, i32* %arrayidx, align 4
    250   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    251   %exitcond = icmp eq i64 %indvars.iv.next, 4
    252   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10
    253 
    254 for.end:                                          ; preds = %for.body
    255   ret void
    256 }
    257 !10 = metadata !{metadata !10, metadata !11}
    258 !11 = metadata !{metadata !"llvm.loop.unroll.count", i32 1}
    259 
    260 ; #pragma clang loop unroll(enable)
    261 ; Loop has very high loop count (1 million) and full unrolling was requested.
    262 ; Loop should unrolled up to the pragma threshold, but not completely.
    263 ;
    264 ; CHECK-LABEL: @unroll_1M(
    265 ; CHECK: store i32
    266 ; CHECK: store i32
    267 ; CHECK: br i1
    268 define void @unroll_1M(i32* nocapture %a, i32 %b) {
    269 entry:
    270   br label %for.body
    271 
    272 for.body:                                         ; preds = %for.body, %entry
    273   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    274   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
    275   %0 = load i32* %arrayidx, align 4
    276   %inc = add nsw i32 %0, 1
    277   store i32 %inc, i32* %arrayidx, align 4
    278   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    279   %exitcond = icmp eq i64 %indvars.iv.next, 1000000
    280   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12
    281 
    282 for.end:                                          ; preds = %for.body
    283   ret void
    284 }
    285 !12 = metadata !{metadata !12, metadata !4}
    286