1 ; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s 2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 3 4 ; For @test11_pattern 5 ; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 1, i32 1, i32 1, i32 1] 6 7 ; For @test13_pattern 8 ; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x i32*] [i32* @G, i32* @G] 9 10 target triple = "x86_64-apple-darwin10.0.0" 11 12 define void @test1(i8* %Base, i64 %Size) nounwind ssp { 13 bb.nph: ; preds = %entry 14 br label %for.body 15 16 for.body: ; preds = %bb.nph, %for.body 17 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 18 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 19 store i8 0, i8* %I.0.014, align 1 20 %indvar.next = add i64 %indvar, 1 21 %exitcond = icmp eq i64 %indvar.next, %Size 22 br i1 %exitcond, label %for.end, label %for.body 23 24 for.end: ; preds = %for.body, %entry 25 ret void 26 ; CHECK-LABEL: @test1( 27 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false) 28 ; CHECK-NOT: store 29 } 30 31 ; This is a loop that was rotated but where the blocks weren't merged. This 32 ; shouldn't perturb us. 33 define void @test1a(i8* %Base, i64 %Size) nounwind ssp { 34 bb.nph: ; preds = %entry 35 br label %for.body 36 37 for.body: ; preds = %bb.nph, %for.body 38 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] 39 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 40 store i8 0, i8* %I.0.014, align 1 41 %indvar.next = add i64 %indvar, 1 42 br label %for.body.cont 43 for.body.cont: 44 %exitcond = icmp eq i64 %indvar.next, %Size 45 br i1 %exitcond, label %for.end, label %for.body 46 47 for.end: ; preds = %for.body, %entry 48 ret void 49 ; CHECK-LABEL: @test1a( 50 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false) 51 ; CHECK-NOT: store 52 } 53 54 55 define void @test2(i32* %Base, i64 %Size) nounwind ssp { 56 entry: 57 %cmp10 = icmp eq i64 %Size, 0 58 br i1 %cmp10, label %for.end, label %for.body 59 60 for.body: ; preds = %entry, %for.body 61 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 62 %add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011 63 store i32 16843009, i32* %add.ptr.i, align 4 64 %inc = add nsw i64 %i.011, 1 65 %exitcond = icmp eq i64 %inc, %Size 66 br i1 %exitcond, label %for.end, label %for.body 67 68 for.end: ; preds = %for.body, %entry 69 ret void 70 ; CHECK-LABEL: @test2( 71 ; CHECK: br i1 %cmp10, 72 ; CHECK: %0 = shl i64 %Size, 2 73 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %0, i32 4, i1 false) 74 ; CHECK-NOT: store 75 } 76 77 ; This is a case where there is an extra may-aliased store in the loop, we can't 78 ; promote the memset. 79 define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp { 80 entry: 81 br label %for.body 82 83 for.body: ; preds = %entry, %for.body 84 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 85 %add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011 86 store i32 16843009, i32* %add.ptr.i, align 4 87 88 store i8 42, i8* %MayAlias 89 %inc = add nsw i64 %i.011, 1 90 %exitcond = icmp eq i64 %inc, %Size 91 br i1 %exitcond, label %for.end, label %for.body 92 93 for.end: ; preds = %entry 94 ret void 95 ; CHECK-LABEL: @test3( 96 ; CHECK-NOT: memset 97 ; CHECK: ret void 98 } 99 100 101 ;; TODO: We should be able to promote this memset. Not yet though. 102 define void @test4(i8* %Base) nounwind ssp { 103 bb.nph: ; preds = %entry 104 %Base100 = getelementptr i8, i8* %Base, i64 1000 105 br label %for.body 106 107 for.body: ; preds = %bb.nph, %for.body 108 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 109 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 110 store i8 0, i8* %I.0.014, align 1 111 112 ;; Store beyond the range memset, should be safe to promote. 113 store i8 42, i8* %Base100 114 115 %indvar.next = add i64 %indvar, 1 116 %exitcond = icmp eq i64 %indvar.next, 100 117 br i1 %exitcond, label %for.end, label %for.body 118 119 for.end: ; preds = %for.body, %entry 120 ret void 121 ; CHECK-TODO-LABEL: @test4( 122 ; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false) 123 ; CHECK-TODO-NOT: store 124 } 125 126 ; This can't be promoted: the memset is a store of a loop variant value. 127 define void @test5(i8* %Base, i64 %Size) nounwind ssp { 128 bb.nph: ; preds = %entry 129 br label %for.body 130 131 for.body: ; preds = %bb.nph, %for.body 132 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 133 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 134 135 %V = trunc i64 %indvar to i8 136 store i8 %V, i8* %I.0.014, align 1 137 %indvar.next = add i64 %indvar, 1 138 %exitcond = icmp eq i64 %indvar.next, %Size 139 br i1 %exitcond, label %for.end, label %for.body 140 141 for.end: ; preds = %for.body, %entry 142 ret void 143 ; CHECK-LABEL: @test5( 144 ; CHECK-NOT: memset 145 ; CHECK: ret void 146 } 147 148 149 ;; memcpy formation 150 define void @test6(i64 %Size) nounwind ssp { 151 bb.nph: 152 %Base = alloca i8, i32 10000 153 %Dest = alloca i8, i32 10000 154 br label %for.body 155 156 for.body: ; preds = %bb.nph, %for.body 157 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 158 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 159 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 160 %V = load i8, i8* %I.0.014, align 1 161 store i8 %V, i8* %DestI, align 1 162 %indvar.next = add i64 %indvar, 1 163 %exitcond = icmp eq i64 %indvar.next, %Size 164 br i1 %exitcond, label %for.end, label %for.body 165 166 for.end: ; preds = %for.body, %entry 167 ret void 168 ; CHECK-LABEL: @test6( 169 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false) 170 ; CHECK-NOT: store 171 ; CHECK: ret void 172 } 173 174 175 ; This is a loop that was rotated but where the blocks weren't merged. This 176 ; shouldn't perturb us. 177 define void @test7(i8* %Base, i64 %Size) nounwind ssp { 178 bb.nph: ; preds = %entry 179 br label %for.body 180 181 for.body: ; preds = %bb.nph, %for.body 182 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] 183 br label %for.body.cont 184 for.body.cont: 185 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 186 store i8 0, i8* %I.0.014, align 1 187 %indvar.next = add i64 %indvar, 1 188 %exitcond = icmp eq i64 %indvar.next, %Size 189 br i1 %exitcond, label %for.end, label %for.body 190 191 for.end: ; preds = %for.body, %entry 192 ret void 193 ; CHECK-LABEL: @test7( 194 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false) 195 ; CHECK-NOT: store 196 } 197 198 ; This is a loop should not be transformed, it only executes one iteration. 199 define void @test8(i64* %Ptr, i64 %Size) nounwind ssp { 200 bb.nph: ; preds = %entry 201 br label %for.body 202 203 for.body: ; preds = %bb.nph, %for.body 204 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 205 %PI = getelementptr i64, i64* %Ptr, i64 %indvar 206 store i64 0, i64 *%PI 207 %indvar.next = add i64 %indvar, 1 208 %exitcond = icmp eq i64 %indvar.next, 1 209 br i1 %exitcond, label %for.end, label %for.body 210 211 for.end: ; preds = %for.body, %entry 212 ret void 213 ; CHECK-LABEL: @test8( 214 ; CHECK: store i64 0, i64* %PI 215 } 216 217 declare i8* @external(i8*) 218 219 ;; This cannot be transformed into a memcpy, because the read-from location is 220 ;; mutated by the loop. 221 define void @test9(i64 %Size) nounwind ssp { 222 bb.nph: 223 %Base = alloca i8, i32 10000 224 %Dest = alloca i8, i32 10000 225 226 %BaseAlias = call i8* @external(i8* %Base) 227 br label %for.body 228 229 for.body: ; preds = %bb.nph, %for.body 230 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 231 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 232 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 233 %V = load i8, i8* %I.0.014, align 1 234 store i8 %V, i8* %DestI, align 1 235 236 ;; This store can clobber the input. 237 store i8 4, i8* %BaseAlias 238 239 %indvar.next = add i64 %indvar, 1 240 %exitcond = icmp eq i64 %indvar.next, %Size 241 br i1 %exitcond, label %for.end, label %for.body 242 243 for.end: ; preds = %for.body, %entry 244 ret void 245 ; CHECK-LABEL: @test9( 246 ; CHECK-NOT: llvm.memcpy 247 ; CHECK: ret void 248 } 249 250 ; Two dimensional nested loop should be promoted to one big memset. 251 define void @test10(i8* %X) nounwind ssp { 252 entry: 253 br label %bb.nph 254 255 bb.nph: ; preds = %entry, %for.inc10 256 %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ] 257 br label %for.body5 258 259 for.body5: ; preds = %for.body5, %bb.nph 260 %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ] 261 %mul = mul nsw i32 %i.04, 100 262 %add = add nsw i32 %j.02, %mul 263 %idxprom = sext i32 %add to i64 264 %arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom 265 store i8 0, i8* %arrayidx, align 1 266 %inc = add nsw i32 %j.02, 1 267 %cmp4 = icmp eq i32 %inc, 100 268 br i1 %cmp4, label %for.inc10, label %for.body5 269 270 for.inc10: ; preds = %for.body5 271 %inc12 = add nsw i32 %i.04, 1 272 %cmp = icmp eq i32 %inc12, 100 273 br i1 %cmp, label %for.end13, label %bb.nph 274 275 for.end13: ; preds = %for.inc10 276 ret void 277 ; CHECK-LABEL: @test10( 278 ; CHECK: entry: 279 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false) 280 ; CHECK-NOT: store 281 ; CHECK: ret void 282 } 283 284 ; On darwin10 (which is the triple in this .ll file) this loop can be turned 285 ; into a memset_pattern call. 286 ; rdar://9009151 287 define void @test11_pattern(i32* nocapture %P) nounwind ssp { 288 entry: 289 br label %for.body 290 291 for.body: ; preds = %entry, %for.body 292 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 293 %arrayidx = getelementptr i32, i32* %P, i64 %indvar 294 store i32 1, i32* %arrayidx, align 4 295 %indvar.next = add i64 %indvar, 1 296 %exitcond = icmp eq i64 %indvar.next, 10000 297 br i1 %exitcond, label %for.end, label %for.body 298 299 for.end: ; preds = %for.body 300 ret void 301 ; CHECK-LABEL: @test11_pattern( 302 ; CHECK-NEXT: entry: 303 ; CHECK-NEXT: bitcast 304 ; CHECK-NEXT: memset_pattern 305 ; CHECK-NOT: store 306 ; CHECK: ret void 307 } 308 309 ; Store of null should turn into memset of zero. 310 define void @test12(i32** nocapture %P) nounwind ssp { 311 entry: 312 br label %for.body 313 314 for.body: ; preds = %entry, %for.body 315 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 316 %arrayidx = getelementptr i32*, i32** %P, i64 %indvar 317 store i32* null, i32** %arrayidx, align 4 318 %indvar.next = add i64 %indvar, 1 319 %exitcond = icmp eq i64 %indvar.next, 10000 320 br i1 %exitcond, label %for.end, label %for.body 321 322 for.end: ; preds = %for.body 323 ret void 324 ; CHECK-LABEL: @test12( 325 ; CHECK-NEXT: entry: 326 ; CHECK-NEXT: bitcast 327 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P1, i8 0, i64 80000, i32 4, i1 false) 328 ; CHECK-NOT: store 329 ; CHECK: ret void 330 } 331 332 @G = global i32 5 333 334 ; This store-of-address loop can be turned into a memset_pattern call. 335 ; rdar://9009151 336 define void @test13_pattern(i32** nocapture %P) nounwind ssp { 337 entry: 338 br label %for.body 339 340 for.body: ; preds = %entry, %for.body 341 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 342 %arrayidx = getelementptr i32*, i32** %P, i64 %indvar 343 store i32* @G, i32** %arrayidx, align 4 344 %indvar.next = add i64 %indvar, 1 345 %exitcond = icmp eq i64 %indvar.next, 10000 346 br i1 %exitcond, label %for.end, label %for.body 347 348 for.end: ; preds = %for.body 349 ret void 350 ; CHECK-LABEL: @test13_pattern( 351 ; CHECK-NEXT: entry: 352 ; CHECK-NEXT: bitcast 353 ; CHECK-NEXT: memset_pattern 354 ; CHECK-NOT: store 355 ; CHECK: ret void 356 } 357 358 359 360 ; PR9815 - This is a partial overlap case that cannot be safely transformed 361 ; into a memcpy. 362 @g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16 363 364 define i32 @test14() nounwind { 365 entry: 366 br label %for.body 367 368 for.body: ; preds = %for.inc, %for.body.lr.ph 369 %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 370 %add = add nsw i32 %tmp5, 4 371 %idxprom = sext i32 %add to i64 372 %arrayidx = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom 373 %tmp2 = load i32, i32* %arrayidx, align 4 374 %add4 = add nsw i32 %tmp5, 5 375 %idxprom5 = sext i32 %add4 to i64 376 %arrayidx6 = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom5 377 store i32 %tmp2, i32* %arrayidx6, align 4 378 %inc = add nsw i32 %tmp5, 1 379 %cmp = icmp slt i32 %inc, 2 380 br i1 %cmp, label %for.body, label %for.end 381 382 for.end: ; preds = %for.inc 383 %tmp8 = load i32, i32* getelementptr inbounds ([7 x i32], [7 x i32]* @g_50, i32 0, i64 6), align 4 384 ret i32 %tmp8 385 ; CHECK-LABEL: @test14( 386 ; CHECK: for.body: 387 ; CHECK: load i32 388 ; CHECK: store i32 389 ; CHECK: br i1 %cmp 390 391 } 392 393 define void @PR14241(i32* %s, i64 %size) { 394 ; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught 395 ; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy 396 ; instead of a memmove. If we get the memmove transform back, this will catch 397 ; regressions. 398 ; 399 ; CHECK-LABEL: @PR14241( 400 401 entry: 402 %end.idx = add i64 %size, -1 403 %end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx 404 br label %while.body 405 ; CHECK-NOT: memcpy 406 ; 407 ; FIXME: When we regain the ability to form a memmove here, this test should be 408 ; reversed and turned into a positive assertion. 409 ; CHECK-NOT: memmove 410 411 while.body: 412 %phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ] 413 %src.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1 414 %val = load i32, i32* %src.ptr, align 4 415 ; CHECK: load 416 %dst.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 0 417 store i32 %val, i32* %dst.ptr, align 4 418 ; CHECK: store 419 %next.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1 420 %cmp = icmp eq i32* %next.ptr, %end.ptr 421 br i1 %cmp, label %exit, label %while.body 422 423 exit: 424 ret void 425 ; CHECK: ret void 426 } 427 428 ; Recognize loops with a negative stride. 429 define void @test15(i32* nocapture %f) { 430 entry: 431 br label %for.body 432 433 for.body: 434 %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ] 435 %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv 436 store i32 0, i32* %arrayidx, align 4 437 %indvars.iv.next = add nsw i64 %indvars.iv, -1 438 %cmp = icmp sgt i64 %indvars.iv, 0 439 br i1 %cmp, label %for.body, label %for.cond.cleanup 440 441 for.cond.cleanup: 442 ret void 443 ; CHECK-LABEL: @test15( 444 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %f1, i8 0, i64 262148, i32 4, i1 false) 445 ; CHECK-NOT: store 446 ; CHECK: ret void 447 } 448 449 ; Loop with a negative stride. Verify an aliasing write to f[65536] prevents 450 ; the creation of a memset. 451 define void @test16(i32* nocapture %f) { 452 entry: 453 %arrayidx1 = getelementptr inbounds i32, i32* %f, i64 65536 454 br label %for.body 455 456 for.body: ; preds = %entry, %for.body 457 %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ] 458 %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv 459 store i32 0, i32* %arrayidx, align 4 460 store i32 1, i32* %arrayidx1, align 4 461 %indvars.iv.next = add nsw i64 %indvars.iv, -1 462 %cmp = icmp sgt i64 %indvars.iv, 0 463 br i1 %cmp, label %for.body, label %for.cond.cleanup 464 465 for.cond.cleanup: ; preds = %for.body 466 ret void 467 ; CHECK-LABEL: @test16( 468 ; CHECK-NOT: call void @llvm.memset.p0i8.i64 469 ; CHECK: ret void 470 } 471 472 ; Handle memcpy-able loops with negative stride. 473 define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) { 474 entry: 475 %conv = sext i32 %c to i64 476 %mul = shl nsw i64 %conv, 2 477 %call = tail call noalias i8* @malloc(i64 %mul) 478 %0 = bitcast i8* %call to i32* 479 %tobool.9 = icmp eq i32 %c, 0 480 br i1 %tobool.9, label %while.end, label %while.body.preheader 481 482 while.body.preheader: ; preds = %entry 483 br label %while.body 484 485 while.body: ; preds = %while.body.preheader, %while.body 486 %dec10.in = phi i32 [ %dec10, %while.body ], [ %c, %while.body.preheader ] 487 %dec10 = add nsw i32 %dec10.in, -1 488 %idxprom = sext i32 %dec10 to i64 489 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom 490 %1 = load i32, i32* %arrayidx, align 4 491 %arrayidx2 = getelementptr inbounds i32, i32* %0, i64 %idxprom 492 store i32 %1, i32* %arrayidx2, align 4 493 %tobool = icmp eq i32 %dec10, 0 494 br i1 %tobool, label %while.end.loopexit, label %while.body 495 496 while.end.loopexit: ; preds = %while.body 497 br label %while.end 498 499 while.end: ; preds = %while.end.loopexit, %entry 500 ret i32* %0 501 ; CHECK-LABEL: @test17( 502 ; CHECK: call void @llvm.memcpy 503 ; CHECK: ret i32* 504 } 505 506 declare noalias i8* @malloc(i64) 507 508 ; Handle memcpy-able loops with negative stride. 509 ; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) { 510 ; for (int i = 2047; i >= 0; --i) { 511 ; a[i] = b[i]; 512 ; } 513 ; } 514 define void @test18(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) #0 { 515 entry: 516 br label %for.body 517 518 for.body: ; preds = %entry, %for.body 519 %indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ] 520 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv 521 %0 = load i32, i32* %arrayidx, align 4 522 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 523 store i32 %0, i32* %arrayidx2, align 4 524 %indvars.iv.next = add nsw i64 %indvars.iv, -1 525 %cmp = icmp sgt i64 %indvars.iv, 0 526 br i1 %cmp, label %for.body, label %for.cond.cleanup 527 528 for.cond.cleanup: ; preds = %for.body 529 ret void 530 ; CHECK-LABEL: @test18( 531 ; CHECK: call void @llvm.memcpy 532 ; CHECK: ret 533 } 534