1 ; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s 2 ; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s 3 ; 4 ; Run loop unrolling twice to verify that loop unrolling metadata is properly 5 ; removed and further unrolling is disabled after the pass is run once. 6 7 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 8 target triple = "x86_64-unknown-linux-gnu" 9 10 ; loop4 contains a small loop which should be completely unrolled by 11 ; the default unrolling heuristics. It serves as a control for the 12 ; unroll(disable) pragma test loop4_with_disable. 13 ; 14 ; CHECK-LABEL: @loop4( 15 ; CHECK-NOT: br i1 16 define void @loop4(i32* nocapture %a) { 17 entry: 18 br label %for.body 19 20 for.body: ; preds = %for.body, %entry 21 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 22 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 23 %0 = load i32, i32* %arrayidx, align 4 24 %inc = add nsw i32 %0, 1 25 store i32 %inc, i32* %arrayidx, align 4 26 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 27 %exitcond = icmp eq i64 %indvars.iv.next, 4 28 br i1 %exitcond, label %for.end, label %for.body 29 30 for.end: ; preds = %for.body 31 ret void 32 } 33 34 ; #pragma clang loop unroll(disable) 35 ; 36 ; CHECK-LABEL: @loop4_with_disable( 37 ; CHECK: store i32 38 ; CHECK-NOT: store i32 39 ; CHECK: br i1 40 define void @loop4_with_disable(i32* nocapture %a) { 41 entry: 42 br label %for.body 43 44 for.body: ; preds = %for.body, %entry 45 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 46 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 47 %0 = load i32, i32* %arrayidx, align 4 48 %inc = add nsw i32 %0, 1 49 store i32 %inc, i32* %arrayidx, align 4 50 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 51 %exitcond = icmp eq i64 %indvars.iv.next, 4 52 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 53 54 for.end: ; preds = %for.body 55 ret void 56 } 57 !1 = !{!1, !2} 58 !2 = !{!"llvm.loop.unroll.disable"} 59 60 ; loop64 has a high enough count that it should *not* be unrolled by 61 ; the default unrolling heuristic. It serves as the control for the 62 ; unroll(full) pragma test loop64_with_.* tests below. 63 ; 64 ; CHECK-LABEL: @loop64( 65 ; CHECK: store i32 66 ; CHECK-NOT: store i32 67 ; CHECK: br i1 68 define void @loop64(i32* nocapture %a) { 69 entry: 70 br label %for.body 71 72 for.body: ; preds = %for.body, %entry 73 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 74 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 75 %0 = load i32, i32* %arrayidx, align 4 76 %inc = add nsw i32 %0, 1 77 store i32 %inc, i32* %arrayidx, align 4 78 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 79 %exitcond = icmp eq i64 %indvars.iv.next, 64 80 br i1 %exitcond, label %for.end, label %for.body 81 82 for.end: ; preds = %for.body 83 ret void 84 } 85 86 ; #pragma clang loop unroll(full) 87 ; Loop should be fully unrolled. 88 ; 89 ; CHECK-LABEL: @loop64_with_full( 90 ; CHECK-NOT: br i1 91 define void @loop64_with_full(i32* nocapture %a) { 92 entry: 93 br label %for.body 94 95 for.body: ; preds = %for.body, %entry 96 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 97 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 98 %0 = load i32, i32* %arrayidx, align 4 99 %inc = add nsw i32 %0, 1 100 store i32 %inc, i32* %arrayidx, align 4 101 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 102 %exitcond = icmp eq i64 %indvars.iv.next, 64 103 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3 104 105 for.end: ; preds = %for.body 106 ret void 107 } 108 !3 = !{!3, !4} 109 !4 = !{!"llvm.loop.unroll.full"} 110 111 ; #pragma clang loop unroll(full) 112 ; Loop should be fully unrolled, even for optsize. 113 ; 114 ; CHECK-LABEL: @loop64_with_full_optsize( 115 ; CHECK-NOT: br i1 116 define void @loop64_with_full_optsize(i32* nocapture %a) optsize { 117 entry: 118 br label %for.body 119 120 for.body: ; preds = %for.body, %entry 121 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 122 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 123 %0 = load i32, i32* %arrayidx, align 4 124 %inc = add nsw i32 %0, 1 125 store i32 %inc, i32* %arrayidx, align 4 126 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 127 %exitcond = icmp eq i64 %indvars.iv.next, 64 128 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3 129 130 for.end: ; preds = %for.body 131 ret void 132 } 133 134 ; #pragma clang loop unroll_count(4) 135 ; Loop should be unrolled 4 times. 136 ; 137 ; CHECK-LABEL: @loop64_with_count4( 138 ; CHECK: store i32 139 ; CHECK: store i32 140 ; CHECK: store i32 141 ; CHECK: store i32 142 ; CHECK-NOT: store i32 143 ; CHECK: br i1 144 define void @loop64_with_count4(i32* nocapture %a) { 145 entry: 146 br label %for.body 147 148 for.body: ; preds = %for.body, %entry 149 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 150 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 151 %0 = load i32, i32* %arrayidx, align 4 152 %inc = add nsw i32 %0, 1 153 store i32 %inc, i32* %arrayidx, align 4 154 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 155 %exitcond = icmp eq i64 %indvars.iv.next, 64 156 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5 157 158 for.end: ; preds = %for.body 159 ret void 160 } 161 !5 = !{!5, !6} 162 !6 = !{!"llvm.loop.unroll.count", i32 4} 163 164 ; #pragma clang loop unroll(full) 165 ; Full unrolling is requested, but loop has a runtime trip count so 166 ; no unrolling should occur. 167 ; 168 ; CHECK-LABEL: @runtime_loop_with_full( 169 ; CHECK: store i32 170 ; CHECK-NOT: store i32 171 define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) { 172 entry: 173 %cmp3 = icmp sgt i32 %b, 0 174 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 175 176 for.body: ; preds = %entry, %for.body 177 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 178 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 179 %0 = load i32, i32* %arrayidx, align 4 180 %inc = add nsw i32 %0, 1 181 store i32 %inc, i32* %arrayidx, align 4 182 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 183 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 184 %exitcond = icmp eq i32 %lftr.wideiv, %b 185 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8 186 187 for.end: ; preds = %for.body, %entry 188 ret void 189 } 190 !8 = !{!8, !4} 191 192 ; #pragma clang loop unroll_count(4) 193 ; Loop has a runtime trip count. Runtime unrolling should occur and loop 194 ; should be duplicated (original and 4x unrolled). 195 ; 196 ; CHECK-LABEL: @runtime_loop_with_count4( 197 ; CHECK: for.body 198 ; CHECK: store 199 ; CHECK: store 200 ; CHECK: store 201 ; CHECK: store 202 ; CHECK-NOT: store 203 ; CHECK: br i1 204 ; CHECK: for.body.epil: 205 ; CHECK: store 206 ; CHECK-NOT: store 207 ; CHECK: br i1 208 define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) { 209 entry: 210 %cmp3 = icmp sgt i32 %b, 0 211 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9 212 213 for.body: ; preds = %entry, %for.body 214 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 215 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 216 %0 = load i32, i32* %arrayidx, align 4 217 %inc = add nsw i32 %0, 1 218 store i32 %inc, i32* %arrayidx, align 4 219 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 220 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 221 %exitcond = icmp eq i32 %lftr.wideiv, %b 222 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9 223 224 for.end: ; preds = %for.body, %entry 225 ret void 226 } 227 !9 = !{!9, !6} 228 229 ; #pragma clang loop unroll_count(1) 230 ; Loop should not be unrolled 231 ; 232 ; CHECK-LABEL: @unroll_1( 233 ; CHECK: store i32 234 ; CHECK-NOT: store i32 235 ; CHECK: br i1 236 define void @unroll_1(i32* nocapture %a, i32 %b) { 237 entry: 238 br label %for.body 239 240 for.body: ; preds = %for.body, %entry 241 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 242 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 243 %0 = load i32, i32* %arrayidx, align 4 244 %inc = add nsw i32 %0, 1 245 store i32 %inc, i32* %arrayidx, align 4 246 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 247 %exitcond = icmp eq i64 %indvars.iv.next, 4 248 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10 249 250 for.end: ; preds = %for.body 251 ret void 252 } 253 !10 = !{!10, !11} 254 !11 = !{!"llvm.loop.unroll.count", i32 1} 255 256 ; #pragma clang loop unroll(full) 257 ; Loop has very high loop count (1 million) and full unrolling was requested. 258 ; Loop should unrolled up to the pragma threshold, but not completely. 259 ; 260 ; CHECK-LABEL: @unroll_1M( 261 ; CHECK: store i32 262 ; CHECK: store i32 263 ; CHECK: br i1 264 define void @unroll_1M(i32* nocapture %a, i32 %b) { 265 entry: 266 br label %for.body 267 268 for.body: ; preds = %for.body, %entry 269 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 270 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 271 %0 = load i32, i32* %arrayidx, align 4 272 %inc = add nsw i32 %0, 1 273 store i32 %inc, i32* %arrayidx, align 4 274 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 275 %exitcond = icmp eq i64 %indvars.iv.next, 1000000 276 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12 277 278 for.end: ; preds = %for.body 279 ret void 280 } 281 !12 = !{!12, !4} 282 283 ; #pragma clang loop unroll(enable) 284 ; Loop should be fully unrolled. 285 ; 286 ; CHECK-LABEL: @loop64_with_enable( 287 ; CHECK-NOT: br i1 288 define void @loop64_with_enable(i32* nocapture %a) { 289 entry: 290 br label %for.body 291 292 for.body: ; preds = %for.body, %entry 293 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 294 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 295 %0 = load i32, i32* %arrayidx, align 4 296 %inc = add nsw i32 %0, 1 297 store i32 %inc, i32* %arrayidx, align 4 298 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 299 %exitcond = icmp eq i64 %indvars.iv.next, 64 300 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13 301 302 for.end: ; preds = %for.body 303 ret void 304 } 305 !13 = !{!13, !14} 306 !14 = !{!"llvm.loop.unroll.enable"} 307 308 ; #pragma clang loop unroll(enable) 309 ; Loop has a runtime trip count and should be runtime unrolled and duplicated 310 ; (original and 8x). 311 ; 312 ; CHECK-LABEL: @runtime_loop_with_enable( 313 ; CHECK: for.body: 314 ; CHECK: store i32 315 ; CHECK: store i32 316 ; CHECK: store i32 317 ; CHECK: store i32 318 ; CHECK: store i32 319 ; CHECK: store i32 320 ; CHECK: store i32 321 ; CHECK: store i32 322 ; CHECK-NOT: store i32 323 ; CHECK: br i1 324 ; CHECK: for.body.epil: 325 ; CHECK: store 326 ; CHECK-NOT: store 327 ; CHECK: br i1 328 define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) { 329 entry: 330 %cmp3 = icmp sgt i32 %b, 0 331 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 332 333 for.body: ; preds = %entry, %for.body 334 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 335 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 336 %0 = load i32, i32* %arrayidx, align 4 337 %inc = add nsw i32 %0, 1 338 store i32 %inc, i32* %arrayidx, align 4 339 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 340 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 341 %exitcond = icmp eq i32 %lftr.wideiv, %b 342 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15 343 344 for.end: ; preds = %for.body, %entry 345 ret void 346 } 347 !15 = !{!15, !14} 348 349 ; #pragma clang loop unroll_count(3) 350 ; Loop has a runtime trip count. Runtime unrolling should occur and loop 351 ; should be duplicated (original and 3x unrolled). 352 ; 353 ; CHECK-LABEL: @runtime_loop_with_count3( 354 ; CHECK: for.body 355 ; CHECK: store 356 ; CHECK: store 357 ; CHECK: store 358 ; CHECK-NOT: store 359 ; CHECK: br i1 360 ; CHECK: for.body.epil: 361 ; CHECK: store 362 ; CHECK-NOT: store 363 ; CHECK: br i1 364 define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) { 365 entry: 366 %cmp3 = icmp sgt i32 %b, 0 367 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16 368 369 for.body: ; preds = %entry, %for.body 370 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 371 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 372 %0 = load i32, i32* %arrayidx, align 4 373 %inc = add nsw i32 %0, 1 374 store i32 %inc, i32* %arrayidx, align 4 375 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 376 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 377 %exitcond = icmp eq i32 %lftr.wideiv, %b 378 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !16 379 380 for.end: ; preds = %for.body, %entry 381 ret void 382 } 383 !16 = !{!16, !17} 384 !17 = !{!"llvm.loop.unroll.count", i32 3} 385