Home | History | Annotate | Download | only in LoopIdiom
      1 ; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
      2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
      3 
      4 ; For @test11_pattern
      5 ; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 1, i32 1, i32 1, i32 1]
      6 
      7 ; For @test13_pattern
      8 ; CHECK: @.memset_pattern1 = private unnamed_addr constant [2 x i32*] [i32* @G, i32* @G]
      9 
     10 target triple = "x86_64-apple-darwin10.0.0"
     11 
     12 define void @test1(i8* %Base, i64 %Size) nounwind ssp {
     13 bb.nph:                                           ; preds = %entry
     14   br label %for.body
     15 
     16 for.body:                                         ; preds = %bb.nph, %for.body
     17   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
     18   %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
     19   store i8 0, i8* %I.0.014, align 1
     20   %indvar.next = add i64 %indvar, 1
     21   %exitcond = icmp eq i64 %indvar.next, %Size
     22   br i1 %exitcond, label %for.end, label %for.body
     23 
     24 for.end:                                          ; preds = %for.body, %entry
     25   ret void
     26 ; CHECK-LABEL: @test1(
     27 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
     28 ; CHECK-NOT: store
     29 }
     30 
     31 ; This is a loop that was rotated but where the blocks weren't merged.  This
     32 ; shouldn't perturb us.
     33 define void @test1a(i8* %Base, i64 %Size) nounwind ssp {
     34 bb.nph:                                           ; preds = %entry
     35   br label %for.body
     36 
     37 for.body:                                         ; preds = %bb.nph, %for.body
     38   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
     39   %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
     40   store i8 0, i8* %I.0.014, align 1
     41   %indvar.next = add i64 %indvar, 1
     42   br label %for.body.cont
     43 for.body.cont:
     44   %exitcond = icmp eq i64 %indvar.next, %Size
     45   br i1 %exitcond, label %for.end, label %for.body
     46 
     47 for.end:                                          ; preds = %for.body, %entry
     48   ret void
     49 ; CHECK-LABEL: @test1a(
     50 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
     51 ; CHECK-NOT: store
     52 }
     53 
     54 
     55 define void @test2(i32* %Base, i64 %Size) nounwind ssp {
     56 entry:
     57   %cmp10 = icmp eq i64 %Size, 0
     58   br i1 %cmp10, label %for.end, label %for.body
     59 
     60 for.body:                                         ; preds = %entry, %for.body
     61   %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
     62   %add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011
     63   store i32 16843009, i32* %add.ptr.i, align 4
     64   %inc = add nsw i64 %i.011, 1
     65   %exitcond = icmp eq i64 %inc, %Size
     66   br i1 %exitcond, label %for.end, label %for.body
     67 
     68 for.end:                                          ; preds = %for.body, %entry
     69   ret void
     70 ; CHECK-LABEL: @test2(
     71 ; CHECK: br i1 %cmp10,
     72 ; CHECK: %0 = mul i64 %Size, 4
     73 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %0, i32 4, i1 false)
     74 ; CHECK-NOT: store
     75 }
     76 
     77 ; This is a case where there is an extra may-aliased store in the loop, we can't
     78 ; promote the memset.
     79 define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp {
     80 entry:
     81   br label %for.body
     82 
     83 for.body:                                         ; preds = %entry, %for.body
     84   %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
     85   %add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011
     86   store i32 16843009, i32* %add.ptr.i, align 4
     87   
     88   store i8 42, i8* %MayAlias
     89   %inc = add nsw i64 %i.011, 1
     90   %exitcond = icmp eq i64 %inc, %Size
     91   br i1 %exitcond, label %for.end, label %for.body
     92 
     93 for.end:                                          ; preds = %entry
     94   ret void
     95 ; CHECK-LABEL: @test3(
     96 ; CHECK-NOT: memset
     97 ; CHECK: ret void
     98 }
     99 
    100 
    101 ;; TODO: We should be able to promote this memset.  Not yet though.
    102 define void @test4(i8* %Base) nounwind ssp {
    103 bb.nph:                                           ; preds = %entry
    104   %Base100 = getelementptr i8, i8* %Base, i64 1000
    105   br label %for.body
    106 
    107 for.body:                                         ; preds = %bb.nph, %for.body
    108   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
    109   %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
    110   store i8 0, i8* %I.0.014, align 1
    111   
    112   ;; Store beyond the range memset, should be safe to promote.
    113   store i8 42, i8* %Base100
    114   
    115   %indvar.next = add i64 %indvar, 1
    116   %exitcond = icmp eq i64 %indvar.next, 100
    117   br i1 %exitcond, label %for.end, label %for.body
    118 
    119 for.end:                                          ; preds = %for.body, %entry
    120   ret void
    121 ; CHECK-TODO-LABEL: @test4(
    122 ; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false)
    123 ; CHECK-TODO-NOT: store
    124 }
    125 
    126 ; This can't be promoted: the memset is a store of a loop variant value.
    127 define void @test5(i8* %Base, i64 %Size) nounwind ssp {
    128 bb.nph:                                           ; preds = %entry
    129   br label %for.body
    130 
    131 for.body:                                         ; preds = %bb.nph, %for.body
    132   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
    133   %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
    134   
    135   %V = trunc i64 %indvar to i8
    136   store i8 %V, i8* %I.0.014, align 1
    137   %indvar.next = add i64 %indvar, 1
    138   %exitcond = icmp eq i64 %indvar.next, %Size
    139   br i1 %exitcond, label %for.end, label %for.body
    140 
    141 for.end:                                          ; preds = %for.body, %entry
    142   ret void
    143 ; CHECK-LABEL: @test5(
    144 ; CHECK-NOT: memset
    145 ; CHECK: ret void
    146 }
    147 
    148 
    149 ;; memcpy formation
    150 define void @test6(i64 %Size) nounwind ssp {
    151 bb.nph:
    152   %Base = alloca i8, i32 10000
    153   %Dest = alloca i8, i32 10000
    154   br label %for.body
    155 
    156 for.body:                                         ; preds = %bb.nph, %for.body
    157   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
    158   %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
    159   %DestI = getelementptr i8, i8* %Dest, i64 %indvar
    160   %V = load i8, i8* %I.0.014, align 1
    161   store i8 %V, i8* %DestI, align 1
    162   %indvar.next = add i64 %indvar, 1
    163   %exitcond = icmp eq i64 %indvar.next, %Size
    164   br i1 %exitcond, label %for.end, label %for.body
    165 
    166 for.end:                                          ; preds = %for.body, %entry
    167   ret void
    168 ; CHECK-LABEL: @test6(
    169 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false)
    170 ; CHECK-NOT: store
    171 ; CHECK: ret void
    172 }
    173 
    174 
    175 ; This is a loop that was rotated but where the blocks weren't merged.  This
    176 ; shouldn't perturb us.
    177 define void @test7(i8* %Base, i64 %Size) nounwind ssp {
    178 bb.nph:                                           ; preds = %entry
    179   br label %for.body
    180 
    181 for.body:                                         ; preds = %bb.nph, %for.body
    182   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
    183   br label %for.body.cont
    184 for.body.cont:
    185   %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
    186   store i8 0, i8* %I.0.014, align 1
    187   %indvar.next = add i64 %indvar, 1
    188   %exitcond = icmp eq i64 %indvar.next, %Size
    189   br i1 %exitcond, label %for.end, label %for.body
    190 
    191 for.end:                                          ; preds = %for.body, %entry
    192   ret void
    193 ; CHECK-LABEL: @test7(
    194 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
    195 ; CHECK-NOT: store
    196 }
    197 
    198 ; This is a loop should not be transformed, it only executes one iteration.
    199 define void @test8(i64* %Ptr, i64 %Size) nounwind ssp {
    200 bb.nph:                                           ; preds = %entry
    201   br label %for.body
    202 
    203 for.body:                                         ; preds = %bb.nph, %for.body
    204   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
    205   %PI = getelementptr i64, i64* %Ptr, i64 %indvar
    206   store i64 0, i64 *%PI
    207   %indvar.next = add i64 %indvar, 1
    208   %exitcond = icmp eq i64 %indvar.next, 1
    209   br i1 %exitcond, label %for.end, label %for.body
    210 
    211 for.end:                                          ; preds = %for.body, %entry
    212   ret void
    213 ; CHECK-LABEL: @test8(
    214 ; CHECK: store i64 0, i64* %PI
    215 }
    216 
    217 declare i8* @external(i8*)
    218 
    219 ;; This cannot be transformed into a memcpy, because the read-from location is
    220 ;; mutated by the loop.
    221 define void @test9(i64 %Size) nounwind ssp {
    222 bb.nph:
    223   %Base = alloca i8, i32 10000
    224   %Dest = alloca i8, i32 10000
    225   
    226   %BaseAlias = call i8* @external(i8* %Base)
    227   br label %for.body
    228 
    229 for.body:                                         ; preds = %bb.nph, %for.body
    230   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
    231   %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
    232   %DestI = getelementptr i8, i8* %Dest, i64 %indvar
    233   %V = load i8, i8* %I.0.014, align 1
    234   store i8 %V, i8* %DestI, align 1
    235 
    236   ;; This store can clobber the input.
    237   store i8 4, i8* %BaseAlias
    238  
    239   %indvar.next = add i64 %indvar, 1
    240   %exitcond = icmp eq i64 %indvar.next, %Size
    241   br i1 %exitcond, label %for.end, label %for.body
    242 
    243 for.end:                                          ; preds = %for.body, %entry
    244   ret void
    245 ; CHECK-LABEL: @test9(
    246 ; CHECK-NOT: llvm.memcpy
    247 ; CHECK: ret void
    248 }
    249 
    250 ; Two dimensional nested loop should be promoted to one big memset.
    251 define void @test10(i8* %X) nounwind ssp {
    252 entry:
    253   br label %bb.nph
    254 
    255 bb.nph:                                           ; preds = %entry, %for.inc10
    256   %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ]
    257   br label %for.body5
    258 
    259 for.body5:                                        ; preds = %for.body5, %bb.nph
    260   %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ]
    261   %mul = mul nsw i32 %i.04, 100
    262   %add = add nsw i32 %j.02, %mul
    263   %idxprom = sext i32 %add to i64
    264   %arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom
    265   store i8 0, i8* %arrayidx, align 1
    266   %inc = add nsw i32 %j.02, 1
    267   %cmp4 = icmp eq i32 %inc, 100
    268   br i1 %cmp4, label %for.inc10, label %for.body5
    269 
    270 for.inc10:                                        ; preds = %for.body5
    271   %inc12 = add nsw i32 %i.04, 1
    272   %cmp = icmp eq i32 %inc12, 100
    273   br i1 %cmp, label %for.end13, label %bb.nph
    274 
    275 for.end13:                                        ; preds = %for.inc10
    276   ret void
    277 ; CHECK-LABEL: @test10(
    278 ; CHECK: entry:
    279 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false)
    280 ; CHECK-NOT: store
    281 ; CHECK: ret void
    282 }
    283 
    284 ; On darwin10 (which is the triple in this .ll file) this loop can be turned
    285 ; into a memset_pattern call.
    286 ; rdar://9009151
    287 define void @test11_pattern(i32* nocapture %P) nounwind ssp {
    288 entry:
    289   br label %for.body
    290 
    291 for.body:                                         ; preds = %entry, %for.body
    292   %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
    293   %arrayidx = getelementptr i32, i32* %P, i64 %indvar
    294   store i32 1, i32* %arrayidx, align 4
    295   %indvar.next = add i64 %indvar, 1
    296   %exitcond = icmp eq i64 %indvar.next, 10000
    297   br i1 %exitcond, label %for.end, label %for.body
    298 
    299 for.end:                                          ; preds = %for.body
    300   ret void
    301 ; CHECK-LABEL: @test11_pattern(
    302 ; CHECK-NEXT: entry:
    303 ; CHECK-NEXT: bitcast
    304 ; CHECK-NEXT: memset_pattern
    305 ; CHECK-NOT: store
    306 ; CHECK: ret void
    307 }
    308 
    309 ; Store of null should turn into memset of zero.
    310 define void @test12(i32** nocapture %P) nounwind ssp {
    311 entry:
    312   br label %for.body
    313 
    314 for.body:                                         ; preds = %entry, %for.body
    315   %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
    316   %arrayidx = getelementptr i32*, i32** %P, i64 %indvar
    317   store i32* null, i32** %arrayidx, align 4
    318   %indvar.next = add i64 %indvar, 1
    319   %exitcond = icmp eq i64 %indvar.next, 10000
    320   br i1 %exitcond, label %for.end, label %for.body
    321 
    322 for.end:                                          ; preds = %for.body
    323   ret void
    324 ; CHECK-LABEL: @test12(
    325 ; CHECK-NEXT: entry:
    326 ; CHECK-NEXT: bitcast
    327 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P1, i8 0, i64 80000, i32 4, i1 false)
    328 ; CHECK-NOT: store
    329 ; CHECK: ret void
    330 }
    331 
    332 @G = global i32 5
    333 
    334 ; This store-of-address loop can be turned into a memset_pattern call.
    335 ; rdar://9009151
    336 define void @test13_pattern(i32** nocapture %P) nounwind ssp {
    337 entry:
    338   br label %for.body
    339 
    340 for.body:                                         ; preds = %entry, %for.body
    341   %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
    342   %arrayidx = getelementptr i32*, i32** %P, i64 %indvar
    343   store i32* @G, i32** %arrayidx, align 4
    344   %indvar.next = add i64 %indvar, 1
    345   %exitcond = icmp eq i64 %indvar.next, 10000
    346   br i1 %exitcond, label %for.end, label %for.body
    347 
    348 for.end:                                          ; preds = %for.body
    349   ret void
    350 ; CHECK-LABEL: @test13_pattern(
    351 ; CHECK-NEXT: entry:
    352 ; CHECK-NEXT: bitcast
    353 ; CHECK-NEXT: memset_pattern
    354 ; CHECK-NOT: store
    355 ; CHECK: ret void
    356 }
    357 
    358 
    359 
    360 ; PR9815 - This is a partial overlap case that cannot be safely transformed
    361 ; into a memcpy.
    362 @g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
    363 
    364 define i32 @test14() nounwind {
    365 entry:
    366   br label %for.body
    367 
    368 for.body:                                         ; preds = %for.inc, %for.body.lr.ph
    369   %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
    370   %add = add nsw i32 %tmp5, 4
    371   %idxprom = sext i32 %add to i64
    372   %arrayidx = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom
    373   %tmp2 = load i32, i32* %arrayidx, align 4
    374   %add4 = add nsw i32 %tmp5, 5
    375   %idxprom5 = sext i32 %add4 to i64
    376   %arrayidx6 = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom5
    377   store i32 %tmp2, i32* %arrayidx6, align 4
    378   %inc = add nsw i32 %tmp5, 1
    379   %cmp = icmp slt i32 %inc, 2
    380   br i1 %cmp, label %for.body, label %for.end
    381 
    382 for.end:                                          ; preds = %for.inc
    383   %tmp8 = load i32, i32* getelementptr inbounds ([7 x i32], [7 x i32]* @g_50, i32 0, i64 6), align 4
    384   ret i32 %tmp8
    385 ; CHECK-LABEL: @test14(
    386 ; CHECK: for.body:
    387 ; CHECK: load i32
    388 ; CHECK: store i32
    389 ; CHECK: br i1 %cmp
    390 
    391 }
    392 
    393 define void @PR14241(i32* %s, i64 %size) {
    394 ; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught
    395 ; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy
    396 ; instead of a memmove. If we get the memmove transform back, this will catch
    397 ; regressions.
    398 ;
    399 ; CHECK-LABEL: @PR14241(
    400 
    401 entry:
    402   %end.idx = add i64 %size, -1
    403   %end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx
    404   br label %while.body
    405 ; CHECK-NOT: memcpy
    406 ;
    407 ; FIXME: When we regain the ability to form a memmove here, this test should be
    408 ; reversed and turned into a positive assertion.
    409 ; CHECK-NOT: memmove
    410 
    411 while.body:
    412   %phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ]
    413   %src.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1
    414   %val = load i32, i32* %src.ptr, align 4
    415 ; CHECK: load
    416   %dst.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 0
    417   store i32 %val, i32* %dst.ptr, align 4
    418 ; CHECK: store
    419   %next.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1
    420   %cmp = icmp eq i32* %next.ptr, %end.ptr
    421   br i1 %cmp, label %exit, label %while.body
    422 
    423 exit:
    424   ret void
    425 ; CHECK: ret void
    426 }
    427