Home | History | Annotate | Download | only in LoopIdiom
      1 ; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
      2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
      3 target triple = "x86_64-apple-darwin10.0.0"
      4 
      5 define void @test1(i8* %Base, i64 %Size) nounwind ssp {
      6 bb.nph:                                           ; preds = %entry
      7   br label %for.body
      8 
      9 for.body:                                         ; preds = %bb.nph, %for.body
     10   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
     11   %I.0.014 = getelementptr i8* %Base, i64 %indvar
     12   store i8 0, i8* %I.0.014, align 1
     13   %indvar.next = add i64 %indvar, 1
     14   %exitcond = icmp eq i64 %indvar.next, %Size
     15   br i1 %exitcond, label %for.end, label %for.body
     16 
     17 for.end:                                          ; preds = %for.body, %entry
     18   ret void
     19 ; CHECK: @test1
     20 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
     21 ; CHECK-NOT: store
     22 }
     23 
     24 ; This is a loop that was rotated but where the blocks weren't merged.  This
     25 ; shouldn't perturb us.
     26 define void @test1a(i8* %Base, i64 %Size) nounwind ssp {
     27 bb.nph:                                           ; preds = %entry
     28   br label %for.body
     29 
     30 for.body:                                         ; preds = %bb.nph, %for.body
     31   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
     32   %I.0.014 = getelementptr i8* %Base, i64 %indvar
     33   store i8 0, i8* %I.0.014, align 1
     34   %indvar.next = add i64 %indvar, 1
     35   br label %for.body.cont
     36 for.body.cont:
     37   %exitcond = icmp eq i64 %indvar.next, %Size
     38   br i1 %exitcond, label %for.end, label %for.body
     39 
     40 for.end:                                          ; preds = %for.body, %entry
     41   ret void
     42 ; CHECK: @test1a
     43 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
     44 ; CHECK-NOT: store
     45 }
     46 
     47 
     48 define void @test2(i32* %Base, i64 %Size) nounwind ssp {
     49 entry:
     50   %cmp10 = icmp eq i64 %Size, 0
     51   br i1 %cmp10, label %for.end, label %for.body
     52 
     53 for.body:                                         ; preds = %entry, %for.body
     54   %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
     55   %add.ptr.i = getelementptr i32* %Base, i64 %i.011
     56   store i32 16843009, i32* %add.ptr.i, align 4
     57   %inc = add nsw i64 %i.011, 1
     58   %exitcond = icmp eq i64 %inc, %Size
     59   br i1 %exitcond, label %for.end, label %for.body
     60 
     61 for.end:                                          ; preds = %for.body, %entry
     62   ret void
     63 ; CHECK: @test2
     64 ; CHECK: br i1 %cmp10,
     65 ; CHECK: %tmp = mul i64 %Size, 4
     66 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %tmp, i32 4, i1 false)
     67 ; CHECK-NOT: store
     68 }
     69 
     70 ; This is a case where there is an extra may-aliased store in the loop, we can't
     71 ; promote the memset.
     72 define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp {
     73 entry:
     74   br label %for.body
     75 
     76 for.body:                                         ; preds = %entry, %for.body
     77   %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
     78   %add.ptr.i = getelementptr i32* %Base, i64 %i.011
     79   store i32 16843009, i32* %add.ptr.i, align 4
     80   
     81   store i8 42, i8* %MayAlias
     82   %inc = add nsw i64 %i.011, 1
     83   %exitcond = icmp eq i64 %inc, %Size
     84   br i1 %exitcond, label %for.end, label %for.body
     85 
     86 for.end:                                          ; preds = %entry
     87   ret void
     88 ; CHECK: @test3
     89 ; CHECK-NOT: memset
     90 ; CHECK: ret void
     91 }
     92 
     93 
     94 ;; TODO: We should be able to promote this memset.  Not yet though.
     95 define void @test4(i8* %Base) nounwind ssp {
     96 bb.nph:                                           ; preds = %entry
     97   %Base100 = getelementptr i8* %Base, i64 1000
     98   br label %for.body
     99 
    100 for.body:                                         ; preds = %bb.nph, %for.body
    101   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
    102   %I.0.014 = getelementptr i8* %Base, i64 %indvar
    103   store i8 0, i8* %I.0.014, align 1
    104   
    105   ;; Store beyond the range memset, should be safe to promote.
    106   store i8 42, i8* %Base100
    107   
    108   %indvar.next = add i64 %indvar, 1
    109   %exitcond = icmp eq i64 %indvar.next, 100
    110   br i1 %exitcond, label %for.end, label %for.body
    111 
    112 for.end:                                          ; preds = %for.body, %entry
    113   ret void
    114 ; CHECK-TODO: @test4
    115 ; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false)
    116 ; CHECK-TODO-NOT: store
    117 }
    118 
    119 ; This can't be promoted: the memset is a store of a loop variant value.
    120 define void @test5(i8* %Base, i64 %Size) nounwind ssp {
    121 bb.nph:                                           ; preds = %entry
    122   br label %for.body
    123 
    124 for.body:                                         ; preds = %bb.nph, %for.body
    125   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
    126   %I.0.014 = getelementptr i8* %Base, i64 %indvar
    127   
    128   %V = trunc i64 %indvar to i8
    129   store i8 %V, i8* %I.0.014, align 1
    130   %indvar.next = add i64 %indvar, 1
    131   %exitcond = icmp eq i64 %indvar.next, %Size
    132   br i1 %exitcond, label %for.end, label %for.body
    133 
    134 for.end:                                          ; preds = %for.body, %entry
    135   ret void
    136 ; CHECK: @test5
    137 ; CHECK-NOT: memset
    138 ; CHECK: ret void
    139 }
    140 
    141 
    142 ;; memcpy formation
    143 define void @test6(i64 %Size) nounwind ssp {
    144 bb.nph:
    145   %Base = alloca i8, i32 10000
    146   %Dest = alloca i8, i32 10000
    147   br label %for.body
    148 
    149 for.body:                                         ; preds = %bb.nph, %for.body
    150   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
    151   %I.0.014 = getelementptr i8* %Base, i64 %indvar
    152   %DestI = getelementptr i8* %Dest, i64 %indvar
    153   %V = load i8* %I.0.014, align 1
    154   store i8 %V, i8* %DestI, align 1
    155   %indvar.next = add i64 %indvar, 1
    156   %exitcond = icmp eq i64 %indvar.next, %Size
    157   br i1 %exitcond, label %for.end, label %for.body
    158 
    159 for.end:                                          ; preds = %for.body, %entry
    160   ret void
    161 ; CHECK: @test6
    162 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false)
    163 ; CHECK-NOT: store
    164 ; CHECK: ret void
    165 }
    166 
    167 
    168 ; This is a loop that was rotated but where the blocks weren't merged.  This
    169 ; shouldn't perturb us.
    170 define void @test7(i8* %Base, i64 %Size) nounwind ssp {
    171 bb.nph:                                           ; preds = %entry
    172   br label %for.body
    173 
    174 for.body:                                         ; preds = %bb.nph, %for.body
    175   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
    176   br label %for.body.cont
    177 for.body.cont:
    178   %I.0.014 = getelementptr i8* %Base, i64 %indvar
    179   store i8 0, i8* %I.0.014, align 1
    180   %indvar.next = add i64 %indvar, 1
    181   %exitcond = icmp eq i64 %indvar.next, %Size
    182   br i1 %exitcond, label %for.end, label %for.body
    183 
    184 for.end:                                          ; preds = %for.body, %entry
    185   ret void
    186 ; CHECK: @test7
    187 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
    188 ; CHECK-NOT: store
    189 }
    190 
    191 ; This is a loop should not be transformed, it only executes one iteration.
    192 define void @test8(i64* %Ptr, i64 %Size) nounwind ssp {
    193 bb.nph:                                           ; preds = %entry
    194   br label %for.body
    195 
    196 for.body:                                         ; preds = %bb.nph, %for.body
    197   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
    198   %PI = getelementptr i64* %Ptr, i64 %indvar
    199   store i64 0, i64 *%PI
    200   %indvar.next = add i64 %indvar, 1
    201   %exitcond = icmp eq i64 %indvar.next, 1
    202   br i1 %exitcond, label %for.end, label %for.body
    203 
    204 for.end:                                          ; preds = %for.body, %entry
    205   ret void
    206 ; CHECK: @test8
    207 ; CHECK: store i64 0, i64* %PI
    208 }
    209 
    210 declare i8* @external(i8*)
    211 
    212 ;; This cannot be transformed into a memcpy, because the read-from location is
    213 ;; mutated by the loop.
    214 define void @test9(i64 %Size) nounwind ssp {
    215 bb.nph:
    216   %Base = alloca i8, i32 10000
    217   %Dest = alloca i8, i32 10000
    218   
    219   %BaseAlias = call i8* @external(i8* %Base)
    220   br label %for.body
    221 
    222 for.body:                                         ; preds = %bb.nph, %for.body
    223   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
    224   %I.0.014 = getelementptr i8* %Base, i64 %indvar
    225   %DestI = getelementptr i8* %Dest, i64 %indvar
    226   %V = load i8* %I.0.014, align 1
    227   store i8 %V, i8* %DestI, align 1
    228 
    229   ;; This store can clobber the input.
    230   store i8 4, i8* %BaseAlias
    231  
    232   %indvar.next = add i64 %indvar, 1
    233   %exitcond = icmp eq i64 %indvar.next, %Size
    234   br i1 %exitcond, label %for.end, label %for.body
    235 
    236 for.end:                                          ; preds = %for.body, %entry
    237   ret void
    238 ; CHECK: @test9
    239 ; CHECK-NOT: llvm.memcpy
    240 ; CHECK: ret void
    241 }
    242 
    243 ; Two dimensional nested loop should be promoted to one big memset.
    244 define void @test10(i8* %X) nounwind ssp {
    245 entry:
    246   br label %bb.nph
    247 
    248 bb.nph:                                           ; preds = %entry, %for.inc10
    249   %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ]
    250   br label %for.body5
    251 
    252 for.body5:                                        ; preds = %for.body5, %bb.nph
    253   %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ]
    254   %mul = mul nsw i32 %i.04, 100
    255   %add = add nsw i32 %j.02, %mul
    256   %idxprom = sext i32 %add to i64
    257   %arrayidx = getelementptr inbounds i8* %X, i64 %idxprom
    258   store i8 0, i8* %arrayidx, align 1
    259   %inc = add nsw i32 %j.02, 1
    260   %cmp4 = icmp eq i32 %inc, 100
    261   br i1 %cmp4, label %for.inc10, label %for.body5
    262 
    263 for.inc10:                                        ; preds = %for.body5
    264   %inc12 = add nsw i32 %i.04, 1
    265   %cmp = icmp eq i32 %inc12, 100
    266   br i1 %cmp, label %for.end13, label %bb.nph
    267 
    268 for.end13:                                        ; preds = %for.inc10
    269   ret void
    270 ; CHECK: @test10
    271 ; CHECK: entry:
    272 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false)
    273 ; CHECK-NOT: store
    274 ; CHECK: ret void
    275 }
    276 
    277 ; On darwin10 (which is the triple in this .ll file) this loop can be turned
    278 ; into a memset_pattern call.
    279 ; rdar://9009151
    280 define void @test11_pattern(i32* nocapture %P) nounwind ssp {
    281 entry:
    282   br label %for.body
    283 
    284 for.body:                                         ; preds = %entry, %for.body
    285   %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
    286   %arrayidx = getelementptr i32* %P, i64 %indvar
    287   store i32 1, i32* %arrayidx, align 4
    288   %indvar.next = add i64 %indvar, 1
    289   %exitcond = icmp eq i64 %indvar.next, 10000
    290   br i1 %exitcond, label %for.end, label %for.body
    291 
    292 for.end:                                          ; preds = %for.body
    293   ret void
    294 ; CHECK: @test11_pattern
    295 ; CHECK-NEXT: entry:
    296 ; CHECK-NEXT: bitcast
    297 ; CHECK-NEXT: memset_pattern
    298 ; CHECK-NOT: store
    299 ; CHECK: ret void
    300 }
    301 
    302 ; Store of null should turn into memset of zero.
    303 define void @test12(i32** nocapture %P) nounwind ssp {
    304 entry:
    305   br label %for.body
    306 
    307 for.body:                                         ; preds = %entry, %for.body
    308   %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
    309   %arrayidx = getelementptr i32** %P, i64 %indvar
    310   store i32* null, i32** %arrayidx, align 4
    311   %indvar.next = add i64 %indvar, 1
    312   %exitcond = icmp eq i64 %indvar.next, 10000
    313   br i1 %exitcond, label %for.end, label %for.body
    314 
    315 for.end:                                          ; preds = %for.body
    316   ret void
    317 ; CHECK: @test12
    318 ; CHECK-NEXT: entry:
    319 ; CHECK-NEXT: bitcast
    320 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P1, i8 0, i64 80000, i32 4, i1 false)
    321 ; CHECK-NOT: store
    322 ; CHECK: ret void
    323 }
    324 
    325 @G = global i32 5
    326 
    327 ; This store-of-address loop can be turned into a memset_pattern call.
    328 ; rdar://9009151
    329 define void @test13_pattern(i32** nocapture %P) nounwind ssp {
    330 entry:
    331   br label %for.body
    332 
    333 for.body:                                         ; preds = %entry, %for.body
    334   %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
    335   %arrayidx = getelementptr i32** %P, i64 %indvar
    336   store i32* @G, i32** %arrayidx, align 4
    337   %indvar.next = add i64 %indvar, 1
    338   %exitcond = icmp eq i64 %indvar.next, 10000
    339   br i1 %exitcond, label %for.end, label %for.body
    340 
    341 for.end:                                          ; preds = %for.body
    342   ret void
    343 ; CHECK: @test13_pattern
    344 ; CHECK-NEXT: entry:
    345 ; CHECK-NEXT: bitcast
    346 ; CHECK-NEXT: memset_pattern
    347 ; CHECK-NOT: store
    348 ; CHECK: ret void
    349 }
    350 
    351 
    352 
    353 ; PR9815 - This is a partial overlap case that cannot be safely transformed
    354 ; into a memcpy.
    355 @g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
    356 
    357 define i32 @test14() nounwind {
    358 entry:
    359   br label %for.body
    360 
    361 for.body:                                         ; preds = %for.inc, %for.body.lr.ph
    362   %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
    363   %add = add nsw i32 %tmp5, 4
    364   %idxprom = sext i32 %add to i64
    365   %arrayidx = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom
    366   %tmp2 = load i32* %arrayidx, align 4
    367   %add4 = add nsw i32 %tmp5, 5
    368   %idxprom5 = sext i32 %add4 to i64
    369   %arrayidx6 = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom5
    370   store i32 %tmp2, i32* %arrayidx6, align 4
    371   %inc = add nsw i32 %tmp5, 1
    372   %cmp = icmp slt i32 %inc, 2
    373   br i1 %cmp, label %for.body, label %for.end
    374 
    375 for.end:                                          ; preds = %for.inc
    376   %tmp8 = load i32* getelementptr inbounds ([7 x i32]* @g_50, i32 0, i64 6), align 4
    377   ret i32 %tmp8
    378 ; CHECK: @test14
    379 ; CHECK: for.body:
    380 ; CHECK: load i32
    381 ; CHECK: store i32
    382 ; CHECK: br i1 %cmp
    383 
    384 }
    385 
    386 
    387