1 ; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-runtime < %s -S | FileCheck %s 2 ; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-runtime -unroll-and-jam-threshold=15 < %s -S | FileCheck %s --check-prefix=CHECK-LOWTHRES 3 4 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" 5 6 ; CHECK-LABEL: test1 7 ; Basic check that these loops are by default UnJ'd 8 define void @test1(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) { 9 ; CHECK: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ] 10 ; CHECK-LOWTHRES: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 11 entry: 12 %cmp = icmp ne i32 %J, 0 13 %cmp122 = icmp ne i32 %I, 0 14 %or.cond = and i1 %cmp, %cmp122 15 br i1 %or.cond, label %for.outer.preheader, label %for.end 16 17 for.outer.preheader: 18 br label %for.outer 19 20 for.outer: 21 %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 22 br label %for.inner 23 24 for.inner: 25 %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ] 26 %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ] 27 %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us 28 %0 = load i32, i32* %arrayidx.us, align 4 29 %add.us = add i32 %0, %sum1.us 30 %inc.us = add nuw i32 %j.us, 1 31 %exitcond = icmp eq i32 %inc.us, %J 32 br i1 %exitcond, label %for.latch, label %for.inner 33 34 for.latch: 35 %add.us.lcssa = phi i32 [ %add.us, %for.inner ] 36 %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us 37 store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4 38 %add8.us = add nuw i32 %i.us, 1 39 %exitcond25 = icmp eq i32 %add8.us, %I 40 br i1 %exitcond25, label %for.end.loopexit, label %for.outer 41 42 for.end.loopexit: 43 br label %for.end 44 45 for.end: 46 ret void 47 } 48 49 50 ; CHECK-LABEL: nounroll_and_jam 51 ; #pragma nounroll_and_jam 52 define void @nounroll_and_jam(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) { 53 ; CHECK: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 54 entry: 55 %cmp = icmp ne i32 %J, 0 56 %cmp122 = icmp ne i32 %I, 0 57 %or.cond = and i1 %cmp, %cmp122 58 br i1 %or.cond, label %for.outer.preheader, label %for.end 59 60 for.outer.preheader: 61 br label %for.outer 62 63 for.outer: 64 %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 65 br label %for.inner 66 67 for.inner: 68 %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ] 69 %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ] 70 %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us 71 %0 = load i32, i32* %arrayidx.us, align 4 72 %add.us = add i32 %0, %sum1.us 73 %inc.us = add nuw i32 %j.us, 1 74 %exitcond = icmp eq i32 %inc.us, %J 75 br i1 %exitcond, label %for.latch, label %for.inner 76 77 for.latch: 78 %add.us.lcssa = phi i32 [ %add.us, %for.inner ] 79 %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us 80 store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4 81 %add8.us = add nuw i32 %i.us, 1 82 %exitcond25 = icmp eq i32 %add8.us, %I 83 br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !1 84 85 for.end.loopexit: 86 br label %for.end 87 88 for.end: 89 ret void 90 } 91 92 93 ; CHECK-LABEL: unroll_and_jam_count 94 ; #pragma unroll_and_jam(8) 95 define void @unroll_and_jam_count(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) { 96 ; CHECK: %i.us = phi i32 [ %add8.us.7, %for.latch ], [ 0, %for.outer.preheader.new ] 97 entry: 98 %cmp = icmp ne i32 %J, 0 99 %cmp122 = icmp ne i32 %I, 0 100 %or.cond = and i1 %cmp, %cmp122 101 br i1 %or.cond, label %for.outer.preheader, label %for.end 102 103 for.outer.preheader: 104 br label %for.outer 105 106 for.outer: 107 %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 108 br label %for.inner 109 110 for.inner: 111 %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ] 112 %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ] 113 %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us 114 %0 = load i32, i32* %arrayidx.us, align 4 115 %add.us = add i32 %0, %sum1.us 116 %inc.us = add nuw i32 %j.us, 1 117 %exitcond = icmp eq i32 %inc.us, %J 118 br i1 %exitcond, label %for.latch, label %for.inner 119 120 for.latch: 121 %add.us.lcssa = phi i32 [ %add.us, %for.inner ] 122 %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us 123 store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4 124 %add8.us = add nuw i32 %i.us, 1 125 %exitcond25 = icmp eq i32 %add8.us, %I 126 br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !3 127 128 for.end.loopexit: 129 br label %for.end 130 131 for.end: 132 ret void 133 } 134 135 136 ; CHECK-LABEL: unroll_and_jam 137 ; #pragma unroll_and_jam 138 define void @unroll_and_jam(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) { 139 ; CHECK: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ] 140 ; CHECK-LOWTHRES: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ] 141 entry: 142 %cmp = icmp ne i32 %J, 0 143 %cmp122 = icmp ne i32 %I, 0 144 %or.cond = and i1 %cmp, %cmp122 145 br i1 %or.cond, label %for.outer.preheader, label %for.end 146 147 for.outer.preheader: 148 br label %for.outer 149 150 for.outer: 151 %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 152 br label %for.inner 153 154 for.inner: 155 %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ] 156 %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ] 157 %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us 158 %0 = load i32, i32* %arrayidx.us, align 4 159 %add.us = add i32 %0, %sum1.us 160 %inc.us = add nuw i32 %j.us, 1 161 %exitcond = icmp eq i32 %inc.us, %J 162 br i1 %exitcond, label %for.latch, label %for.inner 163 164 for.latch: 165 %add.us.lcssa = phi i32 [ %add.us, %for.inner ] 166 %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us 167 store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4 168 %add8.us = add nuw i32 %i.us, 1 169 %exitcond25 = icmp eq i32 %add8.us, %I 170 br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !5 171 172 for.end.loopexit: 173 br label %for.end 174 175 for.end: 176 ret void 177 } 178 179 180 ; CHECK-LABEL: nounroll 181 ; #pragma nounroll (which we take to mean disable unroll and jam too) 182 define void @nounroll(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) { 183 ; CHECK: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 184 entry: 185 %cmp = icmp ne i32 %J, 0 186 %cmp122 = icmp ne i32 %I, 0 187 %or.cond = and i1 %cmp, %cmp122 188 br i1 %or.cond, label %for.outer.preheader, label %for.end 189 190 for.outer.preheader: 191 br label %for.outer 192 193 for.outer: 194 %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 195 br label %for.inner 196 197 for.inner: 198 %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ] 199 %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ] 200 %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us 201 %0 = load i32, i32* %arrayidx.us, align 4 202 %add.us = add i32 %0, %sum1.us 203 %inc.us = add nuw i32 %j.us, 1 204 %exitcond = icmp eq i32 %inc.us, %J 205 br i1 %exitcond, label %for.latch, label %for.inner 206 207 for.latch: 208 %add.us.lcssa = phi i32 [ %add.us, %for.inner ] 209 %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us 210 store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4 211 %add8.us = add nuw i32 %i.us, 1 212 %exitcond25 = icmp eq i32 %add8.us, %I 213 br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !7 214 215 for.end.loopexit: 216 br label %for.end 217 218 for.end: 219 ret void 220 } 221 222 223 ; CHECK-LABEL: unroll 224 ; #pragma unroll (which we take to mean disable unroll and jam) 225 define void @unroll(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) { 226 ; CHECK: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 227 entry: 228 %cmp = icmp ne i32 %J, 0 229 %cmp122 = icmp ne i32 %I, 0 230 %or.cond = and i1 %cmp, %cmp122 231 br i1 %or.cond, label %for.outer.preheader, label %for.end 232 233 for.outer.preheader: 234 br label %for.outer 235 236 for.outer: 237 %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 238 br label %for.inner 239 240 for.inner: 241 %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ] 242 %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ] 243 %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us 244 %0 = load i32, i32* %arrayidx.us, align 4 245 %add.us = add i32 %0, %sum1.us 246 %inc.us = add nuw i32 %j.us, 1 247 %exitcond = icmp eq i32 %inc.us, %J 248 br i1 %exitcond, label %for.latch, label %for.inner 249 250 for.latch: 251 %add.us.lcssa = phi i32 [ %add.us, %for.inner ] 252 %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us 253 store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4 254 %add8.us = add nuw i32 %i.us, 1 255 %exitcond25 = icmp eq i32 %add8.us, %I 256 br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !9 257 258 for.end.loopexit: 259 br label %for.end 260 261 for.end: 262 ret void 263 } 264 265 266 ; CHECK-LABEL: nounroll_plus_unroll_and_jam 267 ; #pragma clang loop nounroll, unroll_and_jam (which we take to mean do unroll_and_jam) 268 define void @nounroll_plus_unroll_and_jam(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) { 269 ; CHECK: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ] 270 entry: 271 %cmp = icmp ne i32 %J, 0 272 %cmp122 = icmp ne i32 %I, 0 273 %or.cond = and i1 %cmp, %cmp122 274 br i1 %or.cond, label %for.outer.preheader, label %for.end 275 276 for.outer.preheader: 277 br label %for.outer 278 279 for.outer: 280 %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 281 br label %for.inner 282 283 for.inner: 284 %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ] 285 %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ] 286 %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us 287 %0 = load i32, i32* %arrayidx.us, align 4 288 %add.us = add i32 %0, %sum1.us 289 %inc.us = add nuw i32 %j.us, 1 290 %exitcond = icmp eq i32 %inc.us, %J 291 br i1 %exitcond, label %for.latch, label %for.inner 292 293 for.latch: 294 %add.us.lcssa = phi i32 [ %add.us, %for.inner ] 295 %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us 296 store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4 297 %add8.us = add nuw i32 %i.us, 1 298 %exitcond25 = icmp eq i32 %add8.us, %I 299 br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !11 300 301 for.end.loopexit: 302 br label %for.end 303 304 for.end: 305 ret void 306 } 307 308 309 !1 = distinct !{!1, !2} 310 !2 = distinct !{!"llvm.loop.unroll_and_jam.disable"} 311 !3 = distinct !{!3, !4} 312 !4 = distinct !{!"llvm.loop.unroll_and_jam.count", i32 8} 313 !5 = distinct !{!5, !6} 314 !6 = distinct !{!"llvm.loop.unroll_and_jam.enable"} 315 !7 = distinct !{!7, !8} 316 !8 = distinct !{!"llvm.loop.unroll.disable"} 317 !9 = distinct !{!9, !10} 318 !10 = distinct !{!"llvm.loop.unroll.enable"} 319 !11 = distinct !{!11, !8, !6}