1 ; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s 2 ; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s 3 ; 4 ; Run loop unrolling twice to verify that loop unrolling metadata is properly 5 ; removed and further unrolling is disabled after the pass is run once. 6 7 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 8 target triple = "x86_64-unknown-linux-gnu" 9 10 ; loop4 contains a small loop which should be completely unrolled by 11 ; the default unrolling heuristics. It serves as a control for the 12 ; unroll(disable) pragma test loop4_with_disable. 13 ; 14 ; CHECK-LABEL: @loop4( 15 ; CHECK-NOT: br i1 16 define void @loop4(i32* nocapture %a) { 17 entry: 18 br label %for.body 19 20 for.body: ; preds = %for.body, %entry 21 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 22 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 23 %0 = load i32, i32* %arrayidx, align 4 24 %inc = add nsw i32 %0, 1 25 store i32 %inc, i32* %arrayidx, align 4 26 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 27 %exitcond = icmp eq i64 %indvars.iv.next, 4 28 br i1 %exitcond, label %for.end, label %for.body 29 30 for.end: ; preds = %for.body 31 ret void 32 } 33 34 ; #pragma clang loop unroll(disable) 35 ; 36 ; CHECK-LABEL: @loop4_with_disable( 37 ; CHECK: store i32 38 ; CHECK-NOT: store i32 39 ; CHECK: br i1 40 define void @loop4_with_disable(i32* nocapture %a) { 41 entry: 42 br label %for.body 43 44 for.body: ; preds = %for.body, %entry 45 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 46 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 47 %0 = load i32, i32* %arrayidx, align 4 48 %inc = add nsw i32 %0, 1 49 store i32 %inc, i32* %arrayidx, align 4 50 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 51 %exitcond = icmp eq i64 %indvars.iv.next, 4 52 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 53 54 for.end: ; preds = %for.body 55 ret void 56 } 57 !1 = !{!1, !2} 58 !2 = !{!"llvm.loop.unroll.disable"} 59 60 ; loop64 has a high enough count that it should *not* be unrolled by 61 ; the default unrolling heuristic. It serves as the control for the 62 ; unroll(full) pragma test loop64_with_.* tests below. 63 ; 64 ; CHECK-LABEL: @loop64( 65 ; CHECK: store i32 66 ; CHECK-NOT: store i32 67 ; CHECK: br i1 68 define void @loop64(i32* nocapture %a) { 69 entry: 70 br label %for.body 71 72 for.body: ; preds = %for.body, %entry 73 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 74 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 75 %0 = load i32, i32* %arrayidx, align 4 76 %inc = add nsw i32 %0, 1 77 store i32 %inc, i32* %arrayidx, align 4 78 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 79 %exitcond = icmp eq i64 %indvars.iv.next, 64 80 br i1 %exitcond, label %for.end, label %for.body 81 82 for.end: ; preds = %for.body 83 ret void 84 } 85 86 ; #pragma clang loop unroll(full) 87 ; Loop should be fully unrolled. 88 ; 89 ; CHECK-LABEL: @loop64_with_full( 90 ; CHECK-NOT: br i1 91 define void @loop64_with_full(i32* nocapture %a) { 92 entry: 93 br label %for.body 94 95 for.body: ; preds = %for.body, %entry 96 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 97 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 98 %0 = load i32, i32* %arrayidx, align 4 99 %inc = add nsw i32 %0, 1 100 store i32 %inc, i32* %arrayidx, align 4 101 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 102 %exitcond = icmp eq i64 %indvars.iv.next, 64 103 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3 104 105 for.end: ; preds = %for.body 106 ret void 107 } 108 !3 = !{!3, !4} 109 !4 = !{!"llvm.loop.unroll.full"} 110 111 ; #pragma clang loop unroll_count(4) 112 ; Loop should be unrolled 4 times. 113 ; 114 ; CHECK-LABEL: @loop64_with_count4( 115 ; CHECK: store i32 116 ; CHECK: store i32 117 ; CHECK: store i32 118 ; CHECK: store i32 119 ; CHECK-NOT: store i32 120 ; CHECK: br i1 121 define void @loop64_with_count4(i32* nocapture %a) { 122 entry: 123 br label %for.body 124 125 for.body: ; preds = %for.body, %entry 126 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 127 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 128 %0 = load i32, i32* %arrayidx, align 4 129 %inc = add nsw i32 %0, 1 130 store i32 %inc, i32* %arrayidx, align 4 131 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 132 %exitcond = icmp eq i64 %indvars.iv.next, 64 133 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5 134 135 for.end: ; preds = %for.body 136 ret void 137 } 138 !5 = !{!5, !6} 139 !6 = !{!"llvm.loop.unroll.count", i32 4} 140 141 ; #pragma clang loop unroll(full) 142 ; Full unrolling is requested, but loop has a runtime trip count so 143 ; no unrolling should occur. 144 ; 145 ; CHECK-LABEL: @runtime_loop_with_full( 146 ; CHECK: store i32 147 ; CHECK-NOT: store i32 148 define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) { 149 entry: 150 %cmp3 = icmp sgt i32 %b, 0 151 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 152 153 for.body: ; preds = %entry, %for.body 154 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 155 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 156 %0 = load i32, i32* %arrayidx, align 4 157 %inc = add nsw i32 %0, 1 158 store i32 %inc, i32* %arrayidx, align 4 159 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 160 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 161 %exitcond = icmp eq i32 %lftr.wideiv, %b 162 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8 163 164 for.end: ; preds = %for.body, %entry 165 ret void 166 } 167 !8 = !{!8, !4} 168 169 ; #pragma clang loop unroll_count(4) 170 ; Loop has a runtime trip count. Runtime unrolling should occur and loop 171 ; should be duplicated (original and 4x unrolled). 172 ; 173 ; CHECK-LABEL: @runtime_loop_with_count4( 174 ; CHECK: for.body.prol: 175 ; CHECK: store 176 ; CHECK-NOT: store 177 ; CHECK: br i1 178 ; CHECK: for.body 179 ; CHECK: store 180 ; CHECK: store 181 ; CHECK: store 182 ; CHECK: store 183 ; CHECK-NOT: store 184 ; CHECK: br i1 185 define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) { 186 entry: 187 %cmp3 = icmp sgt i32 %b, 0 188 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9 189 190 for.body: ; preds = %entry, %for.body 191 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 192 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 193 %0 = load i32, i32* %arrayidx, align 4 194 %inc = add nsw i32 %0, 1 195 store i32 %inc, i32* %arrayidx, align 4 196 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 197 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 198 %exitcond = icmp eq i32 %lftr.wideiv, %b 199 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9 200 201 for.end: ; preds = %for.body, %entry 202 ret void 203 } 204 !9 = !{!9, !6} 205 206 ; #pragma clang loop unroll_count(1) 207 ; Loop should not be unrolled 208 ; 209 ; CHECK-LABEL: @unroll_1( 210 ; CHECK: store i32 211 ; CHECK-NOT: store i32 212 ; CHECK: br i1 213 define void @unroll_1(i32* nocapture %a, i32 %b) { 214 entry: 215 br label %for.body 216 217 for.body: ; preds = %for.body, %entry 218 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 219 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 220 %0 = load i32, i32* %arrayidx, align 4 221 %inc = add nsw i32 %0, 1 222 store i32 %inc, i32* %arrayidx, align 4 223 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 224 %exitcond = icmp eq i64 %indvars.iv.next, 4 225 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10 226 227 for.end: ; preds = %for.body 228 ret void 229 } 230 !10 = !{!10, !11} 231 !11 = !{!"llvm.loop.unroll.count", i32 1} 232 233 ; #pragma clang loop unroll(full) 234 ; Loop has very high loop count (1 million) and full unrolling was requested. 235 ; Loop should unrolled up to the pragma threshold, but not completely. 236 ; 237 ; CHECK-LABEL: @unroll_1M( 238 ; CHECK: store i32 239 ; CHECK: store i32 240 ; CHECK: br i1 241 define void @unroll_1M(i32* nocapture %a, i32 %b) { 242 entry: 243 br label %for.body 244 245 for.body: ; preds = %for.body, %entry 246 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 247 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 248 %0 = load i32, i32* %arrayidx, align 4 249 %inc = add nsw i32 %0, 1 250 store i32 %inc, i32* %arrayidx, align 4 251 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 252 %exitcond = icmp eq i64 %indvars.iv.next, 1000000 253 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12 254 255 for.end: ; preds = %for.body 256 ret void 257 } 258 !12 = !{!12, !4} 259 260 ; #pragma clang loop unroll(enable) 261 ; Loop should be fully unrolled. 262 ; 263 ; CHECK-LABEL: @loop64_with_enable( 264 ; CHECK-NOT: br i1 265 define void @loop64_with_enable(i32* nocapture %a) { 266 entry: 267 br label %for.body 268 269 for.body: ; preds = %for.body, %entry 270 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 271 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 272 %0 = load i32, i32* %arrayidx, align 4 273 %inc = add nsw i32 %0, 1 274 store i32 %inc, i32* %arrayidx, align 4 275 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 276 %exitcond = icmp eq i64 %indvars.iv.next, 64 277 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13 278 279 for.end: ; preds = %for.body 280 ret void 281 } 282 !13 = !{!13, !14} 283 !14 = !{!"llvm.loop.unroll.enable"} 284 285 ; #pragma clang loop unroll(enable) 286 ; Loop has a runtime trip count and should be runtime unrolled and duplicated 287 ; (original and 8x). 288 ; 289 ; CHECK-LABEL: @runtime_loop_with_enable( 290 ; CHECK: for.body.prol: 291 ; CHECK: store 292 ; CHECK-NOT: store 293 ; CHECK: br i1 294 ; CHECK: for.body: 295 ; CHECK: store i32 296 ; CHECK: store i32 297 ; CHECK: store i32 298 ; CHECK: store i32 299 ; CHECK: store i32 300 ; CHECK: store i32 301 ; CHECK: store i32 302 ; CHECK: store i32 303 ; CHECK-NOT: store i32 304 ; CHECK: br i1 305 define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) { 306 entry: 307 %cmp3 = icmp sgt i32 %b, 0 308 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 309 310 for.body: ; preds = %entry, %for.body 311 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 312 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 313 %0 = load i32, i32* %arrayidx, align 4 314 %inc = add nsw i32 %0, 1 315 store i32 %inc, i32* %arrayidx, align 4 316 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 317 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 318 %exitcond = icmp eq i32 %lftr.wideiv, %b 319 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15 320 321 for.end: ; preds = %for.body, %entry 322 ret void 323 } 324 !15 = !{!15, !14} 325