1 ; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s 2 ; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s 3 ; RUN: opt < %s -loop-unroll -unroll-allow-remainder=0 -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,NOREM %s 4 ; 5 ; Run loop unrolling twice to verify that loop unrolling metadata is properly 6 ; removed and further unrolling is disabled after the pass is run once. 7 8 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 9 target triple = "x86_64-unknown-linux-gnu" 10 11 ; loop4 contains a small loop which should be completely unrolled by 12 ; the default unrolling heuristics. It serves as a control for the 13 ; unroll(disable) pragma test loop4_with_disable. 14 ; 15 ; CHECK-LABEL: @loop4( 16 ; CHECK-NOT: br i1 17 define void @loop4(i32* nocapture %a) { 18 entry: 19 br label %for.body 20 21 for.body: ; preds = %for.body, %entry 22 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 23 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 24 %0 = load i32, i32* %arrayidx, align 4 25 %inc = add nsw i32 %0, 1 26 store i32 %inc, i32* %arrayidx, align 4 27 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 28 %exitcond = icmp eq i64 %indvars.iv.next, 4 29 br i1 %exitcond, label %for.end, label %for.body 30 31 for.end: ; preds = %for.body 32 ret void 33 } 34 35 ; #pragma clang loop unroll(disable) 36 ; 37 ; CHECK-LABEL: @loop4_with_disable( 38 ; CHECK: store i32 39 ; CHECK-NOT: store i32 40 ; CHECK: br i1 41 define void @loop4_with_disable(i32* nocapture %a) { 42 entry: 43 br label %for.body 44 45 for.body: ; preds = %for.body, %entry 46 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 47 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 48 %0 = load i32, i32* %arrayidx, align 4 49 %inc = add nsw i32 %0, 1 50 store i32 %inc, i32* %arrayidx, align 4 51 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 52 %exitcond = icmp eq i64 %indvars.iv.next, 4 53 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 54 55 for.end: ; preds = %for.body 56 ret void 57 } 58 !1 = !{!1, !2} 59 !2 = !{!"llvm.loop.unroll.disable"} 60 61 ; loop64 has a high enough count that it should *not* be unrolled by 62 ; the default unrolling heuristic. It serves as the control for the 63 ; unroll(full) pragma test loop64_with_.* tests below. 64 ; 65 ; CHECK-LABEL: @loop64( 66 ; CHECK: store i32 67 ; CHECK-NOT: store i32 68 ; CHECK: br i1 69 define void @loop64(i32* nocapture %a) { 70 entry: 71 br label %for.body 72 73 for.body: ; preds = %for.body, %entry 74 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 75 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 76 %0 = load i32, i32* %arrayidx, align 4 77 %inc = add nsw i32 %0, 1 78 store i32 %inc, i32* %arrayidx, align 4 79 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 80 %exitcond = icmp eq i64 %indvars.iv.next, 64 81 br i1 %exitcond, label %for.end, label %for.body 82 83 for.end: ; preds = %for.body 84 ret void 85 } 86 87 ; #pragma clang loop unroll(full) 88 ; Loop should be fully unrolled. 89 ; 90 ; CHECK-LABEL: @loop64_with_full( 91 ; CHECK-NOT: br i1 92 define void @loop64_with_full(i32* nocapture %a) { 93 entry: 94 br label %for.body 95 96 for.body: ; preds = %for.body, %entry 97 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 98 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 99 %0 = load i32, i32* %arrayidx, align 4 100 %inc = add nsw i32 %0, 1 101 store i32 %inc, i32* %arrayidx, align 4 102 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 103 %exitcond = icmp eq i64 %indvars.iv.next, 64 104 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3 105 106 for.end: ; preds = %for.body 107 ret void 108 } 109 !3 = !{!3, !4} 110 !4 = !{!"llvm.loop.unroll.full"} 111 112 ; #pragma clang loop unroll_count(4) 113 ; Loop should be unrolled 4 times. 114 ; 115 ; CHECK-LABEL: @loop64_with_count4( 116 ; CHECK: store i32 117 ; CHECK: store i32 118 ; CHECK: store i32 119 ; CHECK: store i32 120 ; CHECK-NOT: store i32 121 ; CHECK: br i1 122 define void @loop64_with_count4(i32* nocapture %a) { 123 entry: 124 br label %for.body 125 126 for.body: ; preds = %for.body, %entry 127 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 128 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 129 %0 = load i32, i32* %arrayidx, align 4 130 %inc = add nsw i32 %0, 1 131 store i32 %inc, i32* %arrayidx, align 4 132 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 133 %exitcond = icmp eq i64 %indvars.iv.next, 64 134 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5 135 136 for.end: ; preds = %for.body 137 ret void 138 } 139 !5 = !{!5, !6} 140 !6 = !{!"llvm.loop.unroll.count", i32 4} 141 142 ; #pragma clang loop unroll(full) 143 ; Full unrolling is requested, but loop has a runtime trip count so 144 ; no unrolling should occur. 145 ; 146 ; CHECK-LABEL: @runtime_loop_with_full( 147 ; CHECK: store i32 148 ; CHECK-NOT: store i32 149 define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) { 150 entry: 151 %cmp3 = icmp sgt i32 %b, 0 152 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 153 154 for.body: ; preds = %entry, %for.body 155 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 156 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 157 %0 = load i32, i32* %arrayidx, align 4 158 %inc = add nsw i32 %0, 1 159 store i32 %inc, i32* %arrayidx, align 4 160 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 161 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 162 %exitcond = icmp eq i32 %lftr.wideiv, %b 163 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8 164 165 for.end: ; preds = %for.body, %entry 166 ret void 167 } 168 !8 = !{!8, !4} 169 170 ; #pragma clang loop unroll_count(4) 171 ; Loop has a runtime trip count. Runtime unrolling should occur and loop 172 ; should be duplicated (original and 4x unrolled) if remainder is allowed, 173 ; otherwise loop should not be unrolled. 174 ; 175 ; CHECK-LABEL: @runtime_loop_with_count4( 176 ; CHECK: for.body 177 ; CHECK: store 178 ; REM: store 179 ; REM: store 180 ; REM: store 181 ; CHECK-NOT: store 182 ; CHECK: br i1 183 ; REM: for.body.epil: 184 ; REM: store 185 ; NOREM-NOT: for.body.epil: 186 ; NOREM-NOT: store 187 ; CHECK-NOT: store 188 ; REM: br i1 189 ; NOREM-NOT: br i1 190 define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) { 191 entry: 192 %cmp3 = icmp sgt i32 %b, 0 193 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9 194 195 for.body: ; preds = %entry, %for.body 196 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 197 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 198 %0 = load i32, i32* %arrayidx, align 4 199 %inc = add nsw i32 %0, 1 200 store i32 %inc, i32* %arrayidx, align 4 201 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 202 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 203 %exitcond = icmp eq i32 %lftr.wideiv, %b 204 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9 205 206 for.end: ; preds = %for.body, %entry 207 ret void 208 } 209 !9 = !{!9, !6} 210 211 ; #pragma clang loop unroll_count(1) 212 ; Loop should not be unrolled 213 ; 214 ; CHECK-LABEL: @unroll_1( 215 ; CHECK: store i32 216 ; CHECK-NOT: store i32 217 ; CHECK: br i1 218 define void @unroll_1(i32* nocapture %a, i32 %b) { 219 entry: 220 br label %for.body 221 222 for.body: ; preds = %for.body, %entry 223 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 224 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 225 %0 = load i32, i32* %arrayidx, align 4 226 %inc = add nsw i32 %0, 1 227 store i32 %inc, i32* %arrayidx, align 4 228 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 229 %exitcond = icmp eq i64 %indvars.iv.next, 4 230 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10 231 232 for.end: ; preds = %for.body 233 ret void 234 } 235 !10 = !{!10, !11} 236 !11 = !{!"llvm.loop.unroll.count", i32 1} 237 238 ; #pragma clang loop unroll(full) 239 ; Loop has very high loop count (1 million) and full unrolling was requested. 240 ; Loop should unrolled up to the pragma threshold, but not completely. 241 ; 242 ; CHECK-LABEL: @unroll_1M( 243 ; CHECK: store i32 244 ; CHECK: store i32 245 ; CHECK: br i1 246 define void @unroll_1M(i32* nocapture %a, i32 %b) { 247 entry: 248 br label %for.body 249 250 for.body: ; preds = %for.body, %entry 251 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 252 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 253 %0 = load i32, i32* %arrayidx, align 4 254 %inc = add nsw i32 %0, 1 255 store i32 %inc, i32* %arrayidx, align 4 256 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 257 %exitcond = icmp eq i64 %indvars.iv.next, 1000000 258 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12 259 260 for.end: ; preds = %for.body 261 ret void 262 } 263 !12 = !{!12, !4} 264 265 ; #pragma clang loop unroll(enable) 266 ; Loop should be fully unrolled. 267 ; 268 ; CHECK-LABEL: @loop64_with_enable( 269 ; CHECK-NOT: br i1 270 define void @loop64_with_enable(i32* nocapture %a) { 271 entry: 272 br label %for.body 273 274 for.body: ; preds = %for.body, %entry 275 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 276 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 277 %0 = load i32, i32* %arrayidx, align 4 278 %inc = add nsw i32 %0, 1 279 store i32 %inc, i32* %arrayidx, align 4 280 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 281 %exitcond = icmp eq i64 %indvars.iv.next, 64 282 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13 283 284 for.end: ; preds = %for.body 285 ret void 286 } 287 !13 = !{!13, !14} 288 !14 = !{!"llvm.loop.unroll.enable"} 289 290 ; #pragma clang loop unroll(enable) 291 ; Loop has a runtime trip count and should be runtime unrolled and duplicated 292 ; (original and 8x) if remainder is allowed, otherwise it should not be 293 ; unrolled. 294 ; 295 ; CHECK-LABEL: @runtime_loop_with_enable( 296 ; CHECK: for.body: 297 ; CHECK: store i32 298 ; REM: store i32 299 ; REM: store i32 300 ; REM: store i32 301 ; REM: store i32 302 ; REM: store i32 303 ; REM: store i32 304 ; REM: store i32 305 ; CHECK-NOT: store i32 306 ; CHECK: br i1 307 ; REM: for.body.epil: 308 ; NOREM-NOT: for.body.epil: 309 ; REM: store 310 ; CHECK-NOT: store 311 ; REM: br i1 312 ; NOREM-NOT: br i1 313 define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) { 314 entry: 315 %cmp3 = icmp sgt i32 %b, 0 316 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 317 318 for.body: ; preds = %entry, %for.body 319 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 320 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 321 %0 = load i32, i32* %arrayidx, align 4 322 %inc = add nsw i32 %0, 1 323 store i32 %inc, i32* %arrayidx, align 4 324 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 325 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 326 %exitcond = icmp eq i32 %lftr.wideiv, %b 327 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15 328 329 for.end: ; preds = %for.body, %entry 330 ret void 331 } 332 !15 = !{!15, !14} 333 334 ; #pragma clang loop unroll_count(3) 335 ; Loop has a runtime trip count. Runtime unrolling should occur and loop 336 ; should be duplicated (original and 3x unrolled) if remainder is allowed, 337 ; otherwise it should not be unrolled. 338 ; 339 ; CHECK-LABEL: @runtime_loop_with_count3( 340 ; CHECK: for.body 341 ; CHECK: store 342 ; REM: store 343 ; REM: store 344 ; CHECK-NOT: store 345 ; CHECK: br i1 346 ; REM: for.body.epil: 347 ; REM: store 348 ; NOREM-NOT: for.body.epil: 349 ; NOREM-NOT: store 350 ; CHECK-NOT: store 351 ; REM: br i1 352 define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) { 353 entry: 354 %cmp3 = icmp sgt i32 %b, 0 355 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16 356 357 for.body: ; preds = %entry, %for.body 358 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 359 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 360 %0 = load i32, i32* %arrayidx, align 4 361 %inc = add nsw i32 %0, 1 362 store i32 %inc, i32* %arrayidx, align 4 363 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 364 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 365 %exitcond = icmp eq i32 %lftr.wideiv, %b 366 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !16 367 368 for.end: ; preds = %for.body, %entry 369 ret void 370 } 371 !16 = !{!16, !17} 372 !17 = !{!"llvm.loop.unroll.count", i32 3} 373