1 ; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s 2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 3 target triple = "x86_64-apple-darwin10.0.0" 4 5 define void @test1(i8* %Base, i64 %Size) nounwind ssp { 6 bb.nph: ; preds = %entry 7 br label %for.body 8 9 for.body: ; preds = %bb.nph, %for.body 10 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 11 %I.0.014 = getelementptr i8* %Base, i64 %indvar 12 store i8 0, i8* %I.0.014, align 1 13 %indvar.next = add i64 %indvar, 1 14 %exitcond = icmp eq i64 %indvar.next, %Size 15 br i1 %exitcond, label %for.end, label %for.body 16 17 for.end: ; preds = %for.body, %entry 18 ret void 19 ; CHECK: @test1 20 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false) 21 ; CHECK-NOT: store 22 } 23 24 ; This is a loop that was rotated but where the blocks weren't merged. This 25 ; shouldn't perturb us. 26 define void @test1a(i8* %Base, i64 %Size) nounwind ssp { 27 bb.nph: ; preds = %entry 28 br label %for.body 29 30 for.body: ; preds = %bb.nph, %for.body 31 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] 32 %I.0.014 = getelementptr i8* %Base, i64 %indvar 33 store i8 0, i8* %I.0.014, align 1 34 %indvar.next = add i64 %indvar, 1 35 br label %for.body.cont 36 for.body.cont: 37 %exitcond = icmp eq i64 %indvar.next, %Size 38 br i1 %exitcond, label %for.end, label %for.body 39 40 for.end: ; preds = %for.body, %entry 41 ret void 42 ; CHECK: @test1a 43 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false) 44 ; CHECK-NOT: store 45 } 46 47 48 define void @test2(i32* %Base, i64 %Size) nounwind ssp { 49 entry: 50 %cmp10 = icmp eq i64 %Size, 0 51 br i1 %cmp10, label %for.end, label %for.body 52 53 for.body: ; preds = %entry, %for.body 54 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 55 %add.ptr.i = getelementptr i32* %Base, i64 %i.011 56 store i32 16843009, i32* %add.ptr.i, align 4 57 %inc = add nsw i64 %i.011, 1 58 %exitcond = icmp eq i64 %inc, %Size 59 br i1 %exitcond, label %for.end, label %for.body 60 61 for.end: ; preds = %for.body, %entry 62 ret void 63 ; CHECK: @test2 64 ; CHECK: br i1 %cmp10, 65 ; CHECK: %0 = mul i64 %Size, 4 66 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %0, i32 4, i1 false) 67 ; CHECK-NOT: store 68 } 69 70 ; This is a case where there is an extra may-aliased store in the loop, we can't 71 ; promote the memset. 72 define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp { 73 entry: 74 br label %for.body 75 76 for.body: ; preds = %entry, %for.body 77 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 78 %add.ptr.i = getelementptr i32* %Base, i64 %i.011 79 store i32 16843009, i32* %add.ptr.i, align 4 80 81 store i8 42, i8* %MayAlias 82 %inc = add nsw i64 %i.011, 1 83 %exitcond = icmp eq i64 %inc, %Size 84 br i1 %exitcond, label %for.end, label %for.body 85 86 for.end: ; preds = %entry 87 ret void 88 ; CHECK: @test3 89 ; CHECK-NOT: memset 90 ; CHECK: ret void 91 } 92 93 94 ;; TODO: We should be able to promote this memset. Not yet though. 95 define void @test4(i8* %Base) nounwind ssp { 96 bb.nph: ; preds = %entry 97 %Base100 = getelementptr i8* %Base, i64 1000 98 br label %for.body 99 100 for.body: ; preds = %bb.nph, %for.body 101 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 102 %I.0.014 = getelementptr i8* %Base, i64 %indvar 103 store i8 0, i8* %I.0.014, align 1 104 105 ;; Store beyond the range memset, should be safe to promote. 106 store i8 42, i8* %Base100 107 108 %indvar.next = add i64 %indvar, 1 109 %exitcond = icmp eq i64 %indvar.next, 100 110 br i1 %exitcond, label %for.end, label %for.body 111 112 for.end: ; preds = %for.body, %entry 113 ret void 114 ; CHECK-TODO: @test4 115 ; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false) 116 ; CHECK-TODO-NOT: store 117 } 118 119 ; This can't be promoted: the memset is a store of a loop variant value. 120 define void @test5(i8* %Base, i64 %Size) nounwind ssp { 121 bb.nph: ; preds = %entry 122 br label %for.body 123 124 for.body: ; preds = %bb.nph, %for.body 125 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 126 %I.0.014 = getelementptr i8* %Base, i64 %indvar 127 128 %V = trunc i64 %indvar to i8 129 store i8 %V, i8* %I.0.014, align 1 130 %indvar.next = add i64 %indvar, 1 131 %exitcond = icmp eq i64 %indvar.next, %Size 132 br i1 %exitcond, label %for.end, label %for.body 133 134 for.end: ; preds = %for.body, %entry 135 ret void 136 ; CHECK: @test5 137 ; CHECK-NOT: memset 138 ; CHECK: ret void 139 } 140 141 142 ;; memcpy formation 143 define void @test6(i64 %Size) nounwind ssp { 144 bb.nph: 145 %Base = alloca i8, i32 10000 146 %Dest = alloca i8, i32 10000 147 br label %for.body 148 149 for.body: ; preds = %bb.nph, %for.body 150 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 151 %I.0.014 = getelementptr i8* %Base, i64 %indvar 152 %DestI = getelementptr i8* %Dest, i64 %indvar 153 %V = load i8* %I.0.014, align 1 154 store i8 %V, i8* %DestI, align 1 155 %indvar.next = add i64 %indvar, 1 156 %exitcond = icmp eq i64 %indvar.next, %Size 157 br i1 %exitcond, label %for.end, label %for.body 158 159 for.end: ; preds = %for.body, %entry 160 ret void 161 ; CHECK: @test6 162 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false) 163 ; CHECK-NOT: store 164 ; CHECK: ret void 165 } 166 167 168 ; This is a loop that was rotated but where the blocks weren't merged. This 169 ; shouldn't perturb us. 170 define void @test7(i8* %Base, i64 %Size) nounwind ssp { 171 bb.nph: ; preds = %entry 172 br label %for.body 173 174 for.body: ; preds = %bb.nph, %for.body 175 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] 176 br label %for.body.cont 177 for.body.cont: 178 %I.0.014 = getelementptr i8* %Base, i64 %indvar 179 store i8 0, i8* %I.0.014, align 1 180 %indvar.next = add i64 %indvar, 1 181 %exitcond = icmp eq i64 %indvar.next, %Size 182 br i1 %exitcond, label %for.end, label %for.body 183 184 for.end: ; preds = %for.body, %entry 185 ret void 186 ; CHECK: @test7 187 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false) 188 ; CHECK-NOT: store 189 } 190 191 ; This is a loop should not be transformed, it only executes one iteration. 192 define void @test8(i64* %Ptr, i64 %Size) nounwind ssp { 193 bb.nph: ; preds = %entry 194 br label %for.body 195 196 for.body: ; preds = %bb.nph, %for.body 197 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 198 %PI = getelementptr i64* %Ptr, i64 %indvar 199 store i64 0, i64 *%PI 200 %indvar.next = add i64 %indvar, 1 201 %exitcond = icmp eq i64 %indvar.next, 1 202 br i1 %exitcond, label %for.end, label %for.body 203 204 for.end: ; preds = %for.body, %entry 205 ret void 206 ; CHECK: @test8 207 ; CHECK: store i64 0, i64* %PI 208 } 209 210 declare i8* @external(i8*) 211 212 ;; This cannot be transformed into a memcpy, because the read-from location is 213 ;; mutated by the loop. 214 define void @test9(i64 %Size) nounwind ssp { 215 bb.nph: 216 %Base = alloca i8, i32 10000 217 %Dest = alloca i8, i32 10000 218 219 %BaseAlias = call i8* @external(i8* %Base) 220 br label %for.body 221 222 for.body: ; preds = %bb.nph, %for.body 223 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 224 %I.0.014 = getelementptr i8* %Base, i64 %indvar 225 %DestI = getelementptr i8* %Dest, i64 %indvar 226 %V = load i8* %I.0.014, align 1 227 store i8 %V, i8* %DestI, align 1 228 229 ;; This store can clobber the input. 230 store i8 4, i8* %BaseAlias 231 232 %indvar.next = add i64 %indvar, 1 233 %exitcond = icmp eq i64 %indvar.next, %Size 234 br i1 %exitcond, label %for.end, label %for.body 235 236 for.end: ; preds = %for.body, %entry 237 ret void 238 ; CHECK: @test9 239 ; CHECK-NOT: llvm.memcpy 240 ; CHECK: ret void 241 } 242 243 ; Two dimensional nested loop should be promoted to one big memset. 244 define void @test10(i8* %X) nounwind ssp { 245 entry: 246 br label %bb.nph 247 248 bb.nph: ; preds = %entry, %for.inc10 249 %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ] 250 br label %for.body5 251 252 for.body5: ; preds = %for.body5, %bb.nph 253 %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ] 254 %mul = mul nsw i32 %i.04, 100 255 %add = add nsw i32 %j.02, %mul 256 %idxprom = sext i32 %add to i64 257 %arrayidx = getelementptr inbounds i8* %X, i64 %idxprom 258 store i8 0, i8* %arrayidx, align 1 259 %inc = add nsw i32 %j.02, 1 260 %cmp4 = icmp eq i32 %inc, 100 261 br i1 %cmp4, label %for.inc10, label %for.body5 262 263 for.inc10: ; preds = %for.body5 264 %inc12 = add nsw i32 %i.04, 1 265 %cmp = icmp eq i32 %inc12, 100 266 br i1 %cmp, label %for.end13, label %bb.nph 267 268 for.end13: ; preds = %for.inc10 269 ret void 270 ; CHECK: @test10 271 ; CHECK: entry: 272 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false) 273 ; CHECK-NOT: store 274 ; CHECK: ret void 275 } 276 277 ; On darwin10 (which is the triple in this .ll file) this loop can be turned 278 ; into a memset_pattern call. 279 ; rdar://9009151 280 define void @test11_pattern(i32* nocapture %P) nounwind ssp { 281 entry: 282 br label %for.body 283 284 for.body: ; preds = %entry, %for.body 285 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 286 %arrayidx = getelementptr i32* %P, i64 %indvar 287 store i32 1, i32* %arrayidx, align 4 288 %indvar.next = add i64 %indvar, 1 289 %exitcond = icmp eq i64 %indvar.next, 10000 290 br i1 %exitcond, label %for.end, label %for.body 291 292 for.end: ; preds = %for.body 293 ret void 294 ; CHECK: @test11_pattern 295 ; CHECK-NEXT: entry: 296 ; CHECK-NEXT: bitcast 297 ; CHECK-NEXT: memset_pattern 298 ; CHECK-NOT: store 299 ; CHECK: ret void 300 } 301 302 ; Store of null should turn into memset of zero. 303 define void @test12(i32** nocapture %P) nounwind ssp { 304 entry: 305 br label %for.body 306 307 for.body: ; preds = %entry, %for.body 308 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 309 %arrayidx = getelementptr i32** %P, i64 %indvar 310 store i32* null, i32** %arrayidx, align 4 311 %indvar.next = add i64 %indvar, 1 312 %exitcond = icmp eq i64 %indvar.next, 10000 313 br i1 %exitcond, label %for.end, label %for.body 314 315 for.end: ; preds = %for.body 316 ret void 317 ; CHECK: @test12 318 ; CHECK-NEXT: entry: 319 ; CHECK-NEXT: bitcast 320 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P1, i8 0, i64 80000, i32 4, i1 false) 321 ; CHECK-NOT: store 322 ; CHECK: ret void 323 } 324 325 @G = global i32 5 326 327 ; This store-of-address loop can be turned into a memset_pattern call. 328 ; rdar://9009151 329 define void @test13_pattern(i32** nocapture %P) nounwind ssp { 330 entry: 331 br label %for.body 332 333 for.body: ; preds = %entry, %for.body 334 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 335 %arrayidx = getelementptr i32** %P, i64 %indvar 336 store i32* @G, i32** %arrayidx, align 4 337 %indvar.next = add i64 %indvar, 1 338 %exitcond = icmp eq i64 %indvar.next, 10000 339 br i1 %exitcond, label %for.end, label %for.body 340 341 for.end: ; preds = %for.body 342 ret void 343 ; CHECK: @test13_pattern 344 ; CHECK-NEXT: entry: 345 ; CHECK-NEXT: bitcast 346 ; CHECK-NEXT: memset_pattern 347 ; CHECK-NOT: store 348 ; CHECK: ret void 349 } 350 351 352 353 ; PR9815 - This is a partial overlap case that cannot be safely transformed 354 ; into a memcpy. 355 @g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16 356 357 define i32 @test14() nounwind { 358 entry: 359 br label %for.body 360 361 for.body: ; preds = %for.inc, %for.body.lr.ph 362 %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 363 %add = add nsw i32 %tmp5, 4 364 %idxprom = sext i32 %add to i64 365 %arrayidx = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom 366 %tmp2 = load i32* %arrayidx, align 4 367 %add4 = add nsw i32 %tmp5, 5 368 %idxprom5 = sext i32 %add4 to i64 369 %arrayidx6 = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom5 370 store i32 %tmp2, i32* %arrayidx6, align 4 371 %inc = add nsw i32 %tmp5, 1 372 %cmp = icmp slt i32 %inc, 2 373 br i1 %cmp, label %for.body, label %for.end 374 375 for.end: ; preds = %for.inc 376 %tmp8 = load i32* getelementptr inbounds ([7 x i32]* @g_50, i32 0, i64 6), align 4 377 ret i32 %tmp8 378 ; CHECK: @test14 379 ; CHECK: for.body: 380 ; CHECK: load i32 381 ; CHECK: store i32 382 ; CHECK: br i1 %cmp 383 384 } 385 386 387