1 ; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s 2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 3 4 ; For @test11_pattern 5 ; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 1, i32 1, i32 1, i32 1] 6 7 ; For @test13_pattern 8 ; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x i32*] [i32* @G, i32* @G] 9 10 target triple = "x86_64-apple-darwin10.0.0" 11 12 define void @test1(i8* %Base, i64 %Size) nounwind ssp { 13 bb.nph: ; preds = %entry 14 br label %for.body 15 16 for.body: ; preds = %bb.nph, %for.body 17 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 18 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 19 store i8 0, i8* %I.0.014, align 1 20 %indvar.next = add i64 %indvar, 1 21 %exitcond = icmp eq i64 %indvar.next, %Size 22 br i1 %exitcond, label %for.end, label %for.body 23 24 for.end: ; preds = %for.body, %entry 25 ret void 26 ; CHECK-LABEL: @test1( 27 ; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false) 28 ; CHECK-NOT: store 29 } 30 31 ; Make sure memset is formed for larger than 1 byte stores, and that the 32 ; alignment of the store is preserved 33 define void @test1_i16(i16* align 2 %Base, i64 %Size) nounwind ssp { 34 bb.nph: ; preds = %entry 35 br label %for.body 36 37 for.body: ; preds = %bb.nph, %for.body 38 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 39 %I.0.014 = getelementptr i16, i16* %Base, i64 %indvar 40 store i16 0, i16* %I.0.014, align 2 41 %indvar.next = add i64 %indvar, 1 42 %exitcond = icmp eq i64 %indvar.next, %Size 43 br i1 %exitcond, label %for.end, label %for.body 44 45 for.end: ; preds = %for.body, %entry 46 ret void 47 ; CHECK-LABEL: @test1_i16( 48 ; CHECK: %[[BaseBC:.*]] = bitcast i16* %Base to i8* 49 ; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 1 50 ; CHECK: call void @llvm.memset.p0i8.i64(i8* align 2 %[[BaseBC]], i8 0, i64 %[[Sz]], i1 false) 51 ; CHECK-NOT: store 52 } 53 54 ; This is a loop that was rotated but where the blocks weren't merged. This 55 ; shouldn't perturb us. 56 define void @test1a(i8* %Base, i64 %Size) nounwind ssp { 57 bb.nph: ; preds = %entry 58 br label %for.body 59 60 for.body: ; preds = %bb.nph, %for.body 61 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] 62 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 63 store i8 0, i8* %I.0.014, align 1 64 %indvar.next = add i64 %indvar, 1 65 br label %for.body.cont 66 for.body.cont: 67 %exitcond = icmp eq i64 %indvar.next, %Size 68 br i1 %exitcond, label %for.end, label %for.body 69 70 for.end: ; preds = %for.body, %entry 71 ret void 72 ; CHECK-LABEL: @test1a( 73 ; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false) 74 ; CHECK-NOT: store 75 } 76 77 78 define void @test2(i32* %Base, i64 %Size) nounwind ssp { 79 entry: 80 %cmp10 = icmp eq i64 %Size, 0 81 br i1 %cmp10, label %for.end, label %for.body 82 83 for.body: ; preds = %entry, %for.body 84 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 85 %add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011 86 store i32 16843009, i32* %add.ptr.i, align 4 87 %inc = add nsw i64 %i.011, 1 88 %exitcond = icmp eq i64 %inc, %Size 89 br i1 %exitcond, label %for.end, label %for.body 90 91 for.end: ; preds = %for.body, %entry 92 ret void 93 ; CHECK-LABEL: @test2( 94 ; CHECK: br i1 %cmp10, 95 ; CHECK: %0 = shl i64 %Size, 2 96 ; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %Base1, i8 1, i64 %0, i1 false) 97 ; CHECK-NOT: store 98 } 99 100 ; This is a case where there is an extra may-aliased store in the loop, we can't 101 ; promote the memset. 102 define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp { 103 entry: 104 br label %for.body 105 106 for.body: ; preds = %entry, %for.body 107 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 108 %add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011 109 store i32 16843009, i32* %add.ptr.i, align 4 110 111 store i8 42, i8* %MayAlias 112 %inc = add nsw i64 %i.011, 1 113 %exitcond = icmp eq i64 %inc, %Size 114 br i1 %exitcond, label %for.end, label %for.body 115 116 for.end: ; preds = %entry 117 ret void 118 ; CHECK-LABEL: @test3( 119 ; CHECK-NOT: memset 120 ; CHECK: ret void 121 } 122 123 ; Make sure the first store in the loop is turned into a memset. 124 define void @test4(i8* %Base) nounwind ssp { 125 bb.nph: ; preds = %entry 126 %Base100 = getelementptr i8, i8* %Base, i64 1000 127 br label %for.body 128 129 for.body: ; preds = %bb.nph, %for.body 130 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 131 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 132 store i8 0, i8* %I.0.014, align 1 133 134 ;; Store beyond the range memset, should be safe to promote. 135 store i8 42, i8* %Base100 136 137 %indvar.next = add i64 %indvar, 1 138 %exitcond = icmp eq i64 %indvar.next, 100 139 br i1 %exitcond, label %for.end, label %for.body 140 141 for.end: ; preds = %for.body, %entry 142 ret void 143 ; CHECK-LABEL: @test4( 144 ; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 100, i1 false) 145 } 146 147 ; This can't be promoted: the memset is a store of a loop variant value. 148 define void @test5(i8* %Base, i64 %Size) nounwind ssp { 149 bb.nph: ; preds = %entry 150 br label %for.body 151 152 for.body: ; preds = %bb.nph, %for.body 153 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 154 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 155 156 %V = trunc i64 %indvar to i8 157 store i8 %V, i8* %I.0.014, align 1 158 %indvar.next = add i64 %indvar, 1 159 %exitcond = icmp eq i64 %indvar.next, %Size 160 br i1 %exitcond, label %for.end, label %for.body 161 162 for.end: ; preds = %for.body, %entry 163 ret void 164 ; CHECK-LABEL: @test5( 165 ; CHECK-NOT: memset 166 ; CHECK: ret void 167 } 168 169 170 ;; memcpy formation 171 define void @test6(i64 %Size) nounwind ssp { 172 bb.nph: 173 %Base = alloca i8, i32 10000 174 %Dest = alloca i8, i32 10000 175 br label %for.body 176 177 for.body: ; preds = %bb.nph, %for.body 178 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 179 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 180 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 181 %V = load i8, i8* %I.0.014, align 1 182 store i8 %V, i8* %DestI, align 1 183 %indvar.next = add i64 %indvar, 1 184 %exitcond = icmp eq i64 %indvar.next, %Size 185 br i1 %exitcond, label %for.end, label %for.body 186 187 for.end: ; preds = %for.body, %entry 188 ret void 189 ; CHECK-LABEL: @test6( 190 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i1 false) 191 ; CHECK-NOT: store 192 ; CHECK: ret void 193 } 194 195 ;; memcpy formation, check alignment 196 define void @test6_dest_align(i32* noalias align 1 %Base, i32* noalias align 4 %Dest, i64 %Size) nounwind ssp { 197 bb.nph: 198 br label %for.body 199 200 for.body: ; preds = %bb.nph, %for.body 201 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 202 %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar 203 %DestI = getelementptr i32, i32* %Dest, i64 %indvar 204 %V = load i32, i32* %I.0.014, align 1 205 store i32 %V, i32* %DestI, align 4 206 %indvar.next = add i64 %indvar, 1 207 %exitcond = icmp eq i64 %indvar.next, %Size 208 br i1 %exitcond, label %for.end, label %for.body 209 210 for.end: ; preds = %for.body, %entry 211 ret void 212 ; CHECK-LABEL: @test6_dest_align( 213 ; CHECK: %[[Dst:.*]] = bitcast i32* %Dest to i8* 214 ; CHECK: %[[Src:.*]] = bitcast i32* %Base to i8* 215 ; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 2 216 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %[[Dst]], i8* align 1 %[[Src]], i64 %[[Sz]], i1 false) 217 ; CHECK-NOT: store 218 ; CHECK: ret void 219 } 220 221 ;; memcpy formation, check alignment 222 define void @test6_src_align(i32* noalias align 4 %Base, i32* noalias align 1 %Dest, i64 %Size) nounwind ssp { 223 bb.nph: 224 br label %for.body 225 226 for.body: ; preds = %bb.nph, %for.body 227 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 228 %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar 229 %DestI = getelementptr i32, i32* %Dest, i64 %indvar 230 %V = load i32, i32* %I.0.014, align 4 231 store i32 %V, i32* %DestI, align 1 232 %indvar.next = add i64 %indvar, 1 233 %exitcond = icmp eq i64 %indvar.next, %Size 234 br i1 %exitcond, label %for.end, label %for.body 235 236 for.end: ; preds = %for.body, %entry 237 ret void 238 ; CHECK-LABEL: @test6_src_align( 239 ; CHECK: %[[Dst]] = bitcast i32* %Dest to i8* 240 ; CHECK: %[[Src]] = bitcast i32* %Base to i8* 241 ; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 2 242 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %[[Dst]], i8* align 4 %[[Src]], i64 %[[Sz]], i1 false) 243 ; CHECK-NOT: store 244 ; CHECK: ret void 245 } 246 247 248 ; This is a loop that was rotated but where the blocks weren't merged. This 249 ; shouldn't perturb us. 250 define void @test7(i8* %Base, i64 %Size) nounwind ssp { 251 bb.nph: ; preds = %entry 252 br label %for.body 253 254 for.body: ; preds = %bb.nph, %for.body 255 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] 256 br label %for.body.cont 257 for.body.cont: 258 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 259 store i8 0, i8* %I.0.014, align 1 260 %indvar.next = add i64 %indvar, 1 261 %exitcond = icmp eq i64 %indvar.next, %Size 262 br i1 %exitcond, label %for.end, label %for.body 263 264 for.end: ; preds = %for.body, %entry 265 ret void 266 ; CHECK-LABEL: @test7( 267 ; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false) 268 ; CHECK-NOT: store 269 } 270 271 ; This is a loop should not be transformed, it only executes one iteration. 272 define void @test8(i64* %Ptr, i64 %Size) nounwind ssp { 273 bb.nph: ; preds = %entry 274 br label %for.body 275 276 for.body: ; preds = %bb.nph, %for.body 277 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 278 %PI = getelementptr i64, i64* %Ptr, i64 %indvar 279 store i64 0, i64 *%PI 280 %indvar.next = add i64 %indvar, 1 281 %exitcond = icmp eq i64 %indvar.next, 1 282 br i1 %exitcond, label %for.end, label %for.body 283 284 for.end: ; preds = %for.body, %entry 285 ret void 286 ; CHECK-LABEL: @test8( 287 ; CHECK: store i64 0, i64* %PI 288 } 289 290 declare i8* @external(i8*) 291 292 ;; This cannot be transformed into a memcpy, because the read-from location is 293 ;; mutated by the loop. 294 define void @test9(i64 %Size) nounwind ssp { 295 bb.nph: 296 %Base = alloca i8, i32 10000 297 %Dest = alloca i8, i32 10000 298 299 %BaseAlias = call i8* @external(i8* %Base) 300 br label %for.body 301 302 for.body: ; preds = %bb.nph, %for.body 303 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 304 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 305 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 306 %V = load i8, i8* %I.0.014, align 1 307 store i8 %V, i8* %DestI, align 1 308 309 ;; This store can clobber the input. 310 store i8 4, i8* %BaseAlias 311 312 %indvar.next = add i64 %indvar, 1 313 %exitcond = icmp eq i64 %indvar.next, %Size 314 br i1 %exitcond, label %for.end, label %for.body 315 316 for.end: ; preds = %for.body, %entry 317 ret void 318 ; CHECK-LABEL: @test9( 319 ; CHECK-NOT: llvm.memcpy 320 ; CHECK: ret void 321 } 322 323 ; Two dimensional nested loop should be promoted to one big memset. 324 define void @test10(i8* %X) nounwind ssp { 325 entry: 326 br label %bb.nph 327 328 bb.nph: ; preds = %entry, %for.inc10 329 %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ] 330 br label %for.body5 331 332 for.body5: ; preds = %for.body5, %bb.nph 333 %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ] 334 %mul = mul nsw i32 %i.04, 100 335 %add = add nsw i32 %j.02, %mul 336 %idxprom = sext i32 %add to i64 337 %arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom 338 store i8 0, i8* %arrayidx, align 1 339 %inc = add nsw i32 %j.02, 1 340 %cmp4 = icmp eq i32 %inc, 100 341 br i1 %cmp4, label %for.inc10, label %for.body5 342 343 for.inc10: ; preds = %for.body5 344 %inc12 = add nsw i32 %i.04, 1 345 %cmp = icmp eq i32 %inc12, 100 346 br i1 %cmp, label %for.end13, label %bb.nph 347 348 for.end13: ; preds = %for.inc10 349 ret void 350 ; CHECK-LABEL: @test10( 351 ; CHECK: entry: 352 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 %X, i8 0, i64 10000, i1 false) 353 ; CHECK-NOT: store 354 ; CHECK: ret void 355 } 356 357 ; On darwin10 (which is the triple in this .ll file) this loop can be turned 358 ; into a memset_pattern call. 359 ; rdar://9009151 360 define void @test11_pattern(i32* nocapture %P) nounwind ssp { 361 entry: 362 br label %for.body 363 364 for.body: ; preds = %entry, %for.body 365 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 366 %arrayidx = getelementptr i32, i32* %P, i64 %indvar 367 store i32 1, i32* %arrayidx, align 4 368 %indvar.next = add i64 %indvar, 1 369 %exitcond = icmp eq i64 %indvar.next, 10000 370 br i1 %exitcond, label %for.end, label %for.body 371 372 for.end: ; preds = %for.body 373 ret void 374 ; CHECK-LABEL: @test11_pattern( 375 ; CHECK-NEXT: entry: 376 ; CHECK-NEXT: bitcast 377 ; CHECK-NEXT: memset_pattern 378 ; CHECK-NOT: store 379 ; CHECK: ret void 380 } 381 382 ; Store of null should turn into memset of zero. 383 define void @test12(i32** nocapture %P) nounwind ssp { 384 entry: 385 br label %for.body 386 387 for.body: ; preds = %entry, %for.body 388 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 389 %arrayidx = getelementptr i32*, i32** %P, i64 %indvar 390 store i32* null, i32** %arrayidx, align 4 391 %indvar.next = add i64 %indvar, 1 392 %exitcond = icmp eq i64 %indvar.next, 10000 393 br i1 %exitcond, label %for.end, label %for.body 394 395 for.end: ; preds = %for.body 396 ret void 397 ; CHECK-LABEL: @test12( 398 ; CHECK-NEXT: entry: 399 ; CHECK-NEXT: bitcast 400 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 %P1, i8 0, i64 80000, i1 false) 401 ; CHECK-NOT: store 402 ; CHECK: ret void 403 } 404 405 @G = global i32 5 406 407 ; This store-of-address loop can be turned into a memset_pattern call. 408 ; rdar://9009151 409 define void @test13_pattern(i32** nocapture %P) nounwind ssp { 410 entry: 411 br label %for.body 412 413 for.body: ; preds = %entry, %for.body 414 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 415 %arrayidx = getelementptr i32*, i32** %P, i64 %indvar 416 store i32* @G, i32** %arrayidx, align 4 417 %indvar.next = add i64 %indvar, 1 418 %exitcond = icmp eq i64 %indvar.next, 10000 419 br i1 %exitcond, label %for.end, label %for.body 420 421 for.end: ; preds = %for.body 422 ret void 423 ; CHECK-LABEL: @test13_pattern( 424 ; CHECK-NEXT: entry: 425 ; CHECK-NEXT: bitcast 426 ; CHECK-NEXT: memset_pattern 427 ; CHECK-NOT: store 428 ; CHECK: ret void 429 } 430 431 432 433 ; PR9815 - This is a partial overlap case that cannot be safely transformed 434 ; into a memcpy. 435 @g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16 436 437 define i32 @test14() nounwind { 438 entry: 439 br label %for.body 440 441 for.body: ; preds = %for.inc, %for.body.lr.ph 442 %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 443 %add = add nsw i32 %tmp5, 4 444 %idxprom = sext i32 %add to i64 445 %arrayidx = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom 446 %tmp2 = load i32, i32* %arrayidx, align 4 447 %add4 = add nsw i32 %tmp5, 5 448 %idxprom5 = sext i32 %add4 to i64 449 %arrayidx6 = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom5 450 store i32 %tmp2, i32* %arrayidx6, align 4 451 %inc = add nsw i32 %tmp5, 1 452 %cmp = icmp slt i32 %inc, 2 453 br i1 %cmp, label %for.body, label %for.end 454 455 for.end: ; preds = %for.inc 456 %tmp8 = load i32, i32* getelementptr inbounds ([7 x i32], [7 x i32]* @g_50, i32 0, i64 6), align 4 457 ret i32 %tmp8 458 ; CHECK-LABEL: @test14( 459 ; CHECK: for.body: 460 ; CHECK: load i32 461 ; CHECK: store i32 462 ; CHECK: br i1 %cmp 463 464 } 465 466 define void @PR14241(i32* %s, i64 %size) { 467 ; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught 468 ; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy 469 ; instead of a memmove. If we get the memmove transform back, this will catch 470 ; regressions. 471 ; 472 ; CHECK-LABEL: @PR14241( 473 474 entry: 475 %end.idx = add i64 %size, -1 476 %end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx 477 br label %while.body 478 ; CHECK-NOT: memcpy 479 ; 480 ; FIXME: When we regain the ability to form a memmove here, this test should be 481 ; reversed and turned into a positive assertion. 482 ; CHECK-NOT: memmove 483 484 while.body: 485 %phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ] 486 %src.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1 487 %val = load i32, i32* %src.ptr, align 4 488 ; CHECK: load 489 %dst.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 0 490 store i32 %val, i32* %dst.ptr, align 4 491 ; CHECK: store 492 %next.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1 493 %cmp = icmp eq i32* %next.ptr, %end.ptr 494 br i1 %cmp, label %exit, label %while.body 495 496 exit: 497 ret void 498 ; CHECK: ret void 499 } 500 501 ; Recognize loops with a negative stride. 502 define void @test15(i32* nocapture %f) { 503 entry: 504 br label %for.body 505 506 for.body: 507 %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ] 508 %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv 509 store i32 0, i32* %arrayidx, align 4 510 %indvars.iv.next = add nsw i64 %indvars.iv, -1 511 %cmp = icmp sgt i64 %indvars.iv, 0 512 br i1 %cmp, label %for.body, label %for.cond.cleanup 513 514 for.cond.cleanup: 515 ret void 516 ; CHECK-LABEL: @test15( 517 ; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %f1, i8 0, i64 262148, i1 false) 518 ; CHECK-NOT: store 519 ; CHECK: ret void 520 } 521 522 ; Loop with a negative stride. Verify an aliasing write to f[65536] prevents 523 ; the creation of a memset. 524 define void @test16(i32* nocapture %f) { 525 entry: 526 %arrayidx1 = getelementptr inbounds i32, i32* %f, i64 65536 527 br label %for.body 528 529 for.body: ; preds = %entry, %for.body 530 %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ] 531 %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv 532 store i32 0, i32* %arrayidx, align 4 533 store i32 1, i32* %arrayidx1, align 4 534 %indvars.iv.next = add nsw i64 %indvars.iv, -1 535 %cmp = icmp sgt i64 %indvars.iv, 0 536 br i1 %cmp, label %for.body, label %for.cond.cleanup 537 538 for.cond.cleanup: ; preds = %for.body 539 ret void 540 ; CHECK-LABEL: @test16( 541 ; CHECK-NOT: call void @llvm.memset.p0i8.i64 542 ; CHECK: ret void 543 } 544 545 ; Handle memcpy-able loops with negative stride. 546 define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) { 547 entry: 548 %conv = sext i32 %c to i64 549 %mul = shl nsw i64 %conv, 2 550 %call = tail call noalias i8* @malloc(i64 %mul) 551 %0 = bitcast i8* %call to i32* 552 %tobool.9 = icmp eq i32 %c, 0 553 br i1 %tobool.9, label %while.end, label %while.body.preheader 554 555 while.body.preheader: ; preds = %entry 556 br label %while.body 557 558 while.body: ; preds = %while.body.preheader, %while.body 559 %dec10.in = phi i32 [ %dec10, %while.body ], [ %c, %while.body.preheader ] 560 %dec10 = add nsw i32 %dec10.in, -1 561 %idxprom = sext i32 %dec10 to i64 562 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom 563 %1 = load i32, i32* %arrayidx, align 4 564 %arrayidx2 = getelementptr inbounds i32, i32* %0, i64 %idxprom 565 store i32 %1, i32* %arrayidx2, align 4 566 %tobool = icmp eq i32 %dec10, 0 567 br i1 %tobool, label %while.end.loopexit, label %while.body 568 569 while.end.loopexit: ; preds = %while.body 570 br label %while.end 571 572 while.end: ; preds = %while.end.loopexit, %entry 573 ret i32* %0 574 ; CHECK-LABEL: @test17( 575 ; CHECK: call void @llvm.memcpy 576 ; CHECK: ret i32* 577 } 578 579 declare noalias i8* @malloc(i64) 580 581 ; Handle memcpy-able loops with negative stride. 582 ; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) { 583 ; for (int i = 2047; i >= 0; --i) { 584 ; a[i] = b[i]; 585 ; } 586 ; } 587 define void @test18(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) #0 { 588 entry: 589 br label %for.body 590 591 for.body: ; preds = %entry, %for.body 592 %indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ] 593 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv 594 %0 = load i32, i32* %arrayidx, align 4 595 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 596 store i32 %0, i32* %arrayidx2, align 4 597 %indvars.iv.next = add nsw i64 %indvars.iv, -1 598 %cmp = icmp sgt i64 %indvars.iv, 0 599 br i1 %cmp, label %for.body, label %for.cond.cleanup 600 601 for.cond.cleanup: ; preds = %for.body 602 ret void 603 ; CHECK-LABEL: @test18( 604 ; CHECK: call void @llvm.memcpy 605 ; CHECK: ret 606 } 607 608 ; Two dimensional nested loop with negative stride should be promoted to one big memset. 609 define void @test19(i8* nocapture %X) { 610 entry: 611 br label %for.cond1.preheader 612 613 for.cond1.preheader: ; preds = %entry, %for.inc4 614 %i.06 = phi i32 [ 99, %entry ], [ %dec5, %for.inc4 ] 615 %mul = mul nsw i32 %i.06, 100 616 br label %for.body3 617 618 for.body3: ; preds = %for.cond1.preheader, %for.body3 619 %j.05 = phi i32 [ 99, %for.cond1.preheader ], [ %dec, %for.body3 ] 620 %add = add nsw i32 %j.05, %mul 621 %idxprom = sext i32 %add to i64 622 %arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom 623 store i8 0, i8* %arrayidx, align 1 624 %dec = add nsw i32 %j.05, -1 625 %cmp2 = icmp sgt i32 %j.05, 0 626 br i1 %cmp2, label %for.body3, label %for.inc4 627 628 for.inc4: ; preds = %for.body3 629 %dec5 = add nsw i32 %i.06, -1 630 %cmp = icmp sgt i32 %i.06, 0 631 br i1 %cmp, label %for.cond1.preheader, label %for.end6 632 633 for.end6: ; preds = %for.inc4 634 ret void 635 ; CHECK-LABEL: @test19( 636 ; CHECK: entry: 637 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 %X, i8 0, i64 10000, i1 false) 638 ; CHECK: ret void 639 } 640 641 ; Handle loops where the trip count is a narrow integer that needs to be 642 ; extended. 643 define void @form_memset_narrow_size(i64* %ptr, i32 %size) { 644 ; CHECK-LABEL: @form_memset_narrow_size( 645 entry: 646 %cmp1 = icmp sgt i32 %size, 0 647 br i1 %cmp1, label %loop.ph, label %exit 648 ; CHECK: entry: 649 ; CHECK: %[[C1:.*]] = icmp sgt i32 %size, 0 650 ; CHECK-NEXT: br i1 %[[C1]], label %loop.ph, label %exit 651 652 loop.ph: 653 br label %loop.body 654 ; CHECK: loop.ph: 655 ; CHECK-NEXT: %[[ZEXT_SIZE:.*]] = zext i32 %size to i64 656 ; CHECK-NEXT: %[[SCALED_SIZE:.*]] = shl i64 %[[ZEXT_SIZE]], 3 657 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %{{.*}}, i8 0, i64 %[[SCALED_SIZE]], i1 false) 658 659 loop.body: 660 %storemerge4 = phi i32 [ 0, %loop.ph ], [ %inc, %loop.body ] 661 %idxprom = sext i32 %storemerge4 to i64 662 %arrayidx = getelementptr inbounds i64, i64* %ptr, i64 %idxprom 663 store i64 0, i64* %arrayidx, align 8 664 %inc = add nsw i32 %storemerge4, 1 665 %cmp2 = icmp slt i32 %inc, %size 666 br i1 %cmp2, label %loop.body, label %loop.exit 667 668 loop.exit: 669 br label %exit 670 671 exit: 672 ret void 673 } 674 675 define void @form_memcpy_narrow_size(i64* noalias %dst, i64* noalias %src, i32 %size) { 676 ; CHECK-LABEL: @form_memcpy_narrow_size( 677 entry: 678 %cmp1 = icmp sgt i32 %size, 0 679 br i1 %cmp1, label %loop.ph, label %exit 680 ; CHECK: entry: 681 ; CHECK: %[[C1:.*]] = icmp sgt i32 %size, 0 682 ; CHECK-NEXT: br i1 %[[C1]], label %loop.ph, label %exit 683 684 loop.ph: 685 br label %loop.body 686 ; CHECK: loop.ph: 687 ; CHECK-NEXT: %[[ZEXT_SIZE:.*]] = zext i32 %size to i64 688 ; CHECK-NEXT: %[[SCALED_SIZE:.*]] = shl i64 %[[ZEXT_SIZE]], 3 689 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %{{.*}}, i8* align 8 %{{.*}}, i64 %[[SCALED_SIZE]], i1 false) 690 691 loop.body: 692 %storemerge4 = phi i32 [ 0, %loop.ph ], [ %inc, %loop.body ] 693 %idxprom1 = sext i32 %storemerge4 to i64 694 %arrayidx1 = getelementptr inbounds i64, i64* %src, i64 %idxprom1 695 %v = load i64, i64* %arrayidx1, align 8 696 %idxprom2 = sext i32 %storemerge4 to i64 697 %arrayidx2 = getelementptr inbounds i64, i64* %dst, i64 %idxprom2 698 store i64 %v, i64* %arrayidx2, align 8 699 %inc = add nsw i32 %storemerge4, 1 700 %cmp2 = icmp slt i32 %inc, %size 701 br i1 %cmp2, label %loop.body, label %loop.exit 702 703 loop.exit: 704 br label %exit 705 706 exit: 707 ret void 708 } 709 710 ; Validate that "memset_pattern" has the proper attributes. 711 ; CHECK: declare void @memset_pattern16(i8* nocapture, i8* nocapture readonly, i64) [[ATTRS:#[0-9]+]] 712 ; CHECK: [[ATTRS]] = { argmemonly } 713