Home | History | Annotate | Download | only in LICM
      1 ; RUN: opt < %s -licm -S | FileCheck %s
      2 ; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S | FileCheck %s
      3 
      4 @X = global i32 0		; <i32*> [#uses=1]
      5 
      6 declare void @foo()
      7 
      8 declare i32 @llvm.bitreverse.i32(i32)
      9 
     10 ; This testcase tests for a problem where LICM hoists 
     11 ; potentially trapping instructions when they are not guaranteed to execute.
     12 define i32 @test1(i1 %c) {
     13 ; CHECK-LABEL: @test1(
     14 	%A = load i32, i32* @X		; <i32> [#uses=2]
     15 	br label %Loop
     16 Loop:		; preds = %LoopTail, %0
     17 	call void @foo( )
     18 	br i1 %c, label %LoopTail, label %IfUnEqual
     19         
     20 IfUnEqual:		; preds = %Loop
     21 ; CHECK: IfUnEqual:
     22 ; CHECK-NEXT: sdiv i32 4, %A
     23 	%B1 = sdiv i32 4, %A		; <i32> [#uses=1]
     24 	br label %LoopTail
     25         
     26 LoopTail:		; preds = %IfUnEqual, %Loop
     27 	%B = phi i32 [ 0, %Loop ], [ %B1, %IfUnEqual ]		; <i32> [#uses=1]
     28 	br i1 %c, label %Loop, label %Out
     29 Out:		; preds = %LoopTail
     30 	%C = sub i32 %A, %B		; <i32> [#uses=1]
     31 	ret i32 %C
     32 }
     33 
     34 
     35 declare void @foo2(i32) nounwind
     36 
     37 
     38 ;; It is ok and desirable to hoist this potentially trapping instruction.
     39 define i32 @test2(i1 %c) {
     40 ; CHECK-LABEL: @test2(
     41 ; CHECK-NEXT: load i32, i32* @X
     42 ; CHECK-NEXT: %B = sdiv i32 4, %A
     43   %A = load i32, i32* @X
     44   br label %Loop
     45 
     46 Loop:
     47   ;; Should have hoisted this div!
     48   %B = sdiv i32 4, %A
     49   br label %loop2
     50 
     51 loop2:
     52   call void @foo2( i32 %B )
     53   br i1 %c, label %Loop, label %Out
     54 
     55 Out:
     56   %C = sub i32 %A, %B
     57   ret i32 %C
     58 }
     59 
     60 
     61 ; This loop invariant instruction should be constant folded, not hoisted.
     62 define i32 @test3(i1 %c) {
     63 ; CHECK-LABEL: define i32 @test3(
     64 ; CHECK: call void @foo2(i32 6)
     65 	%A = load i32, i32* @X		; <i32> [#uses=2]
     66 	br label %Loop
     67 Loop:
     68 	%B = add i32 4, 2		; <i32> [#uses=2]
     69 	call void @foo2( i32 %B )
     70 	br i1 %c, label %Loop, label %Out
     71 Out:		; preds = %Loop
     72 	%C = sub i32 %A, %B		; <i32> [#uses=1]
     73 	ret i32 %C
     74 }
     75 
     76 ; CHECK-LABEL: @test4(
     77 ; CHECK: call
     78 ; CHECK: sdiv
     79 ; CHECK: ret
     80 define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp {
     81 entry:
     82   br label %for.body
     83 
     84 for.body:                                         ; preds = %entry, %for.body
     85   %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
     86   %n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
     87   call void @foo_may_call_exit(i32 0)
     88   %div = sdiv i32 %x, %y
     89   %add = add nsw i32 %n.01, %div
     90   %inc = add nsw i32 %i.02, 1
     91   %cmp = icmp slt i32 %inc, 10000
     92   br i1 %cmp, label %for.body, label %for.end
     93 
     94 for.end:                                          ; preds = %for.body
     95   %n.0.lcssa = phi i32 [ %add, %for.body ]
     96   ret i32 %n.0.lcssa
     97 }
     98 
     99 declare void @foo_may_call_exit(i32)
    100 
    101 ; PR14854
    102 ; CHECK-LABEL: @test5(
    103 ; CHECK: extractvalue
    104 ; CHECK: br label %tailrecurse
    105 ; CHECK: tailrecurse:
    106 ; CHECK: ifend:
    107 ; CHECK: insertvalue
    108 define { i32*, i32 } @test5(i32 %i, { i32*, i32 } %e) {
    109 entry:
    110   br label %tailrecurse
    111 
    112 tailrecurse:                                      ; preds = %then, %entry
    113   %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ]
    114   %out = extractvalue { i32*, i32 } %e, 1
    115   %d = insertvalue { i32*, i32 } %e, i32* null, 0
    116   %cmp1 = icmp sgt i32 %out, %i.tr
    117   br i1 %cmp1, label %then, label %ifend
    118 
    119 then:                                             ; preds = %tailrecurse
    120   call void @foo()
    121   %cmp2 = add i32 %i.tr, 1
    122   br label %tailrecurse
    123 
    124 ifend:                                            ; preds = %tailrecurse
    125   ret { i32*, i32 } %d
    126 }
    127 
    128 ; CHECK: define i32 @hoist_bitreverse(i32)
    129 ; CHECK: bitreverse
    130 ; CHECK: br label %header
    131 define i32 @hoist_bitreverse(i32)  {
    132   br label %header
    133 
    134 header:
    135   %sum = phi i32 [ 0, %1 ], [ %5, %latch ]
    136   %2 = phi i32 [ 0, %1 ], [ %6, %latch ]
    137   %3 = icmp slt i32 %2, 1024
    138   br i1 %3, label %body, label %return
    139 
    140 body:
    141   %4 = call i32 @llvm.bitreverse.i32(i32 %0)
    142   %5 = add i32 %sum, %4
    143   br label %latch
    144 
    145 latch:
    146   %6 = add nsw i32 %2, 1
    147   br label %header
    148 
    149 return:
    150   ret i32 %sum
    151 }
    152 
    153 declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
    154 declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind
    155 declare void @escaping.invariant.start({}*) nounwind
    156 ; invariant.start dominates the load, and in this scope, the
    157 ; load is invariant. So, we can hoist the `addrld` load out of the loop.
    158 define i32 @test_fence(i8* %addr, i32 %n, i8* %volatile) {
    159 ; CHECK-LABEL: @test_fence
    160 ; CHECK-LABEL: entry
    161 ; CHECK: invariant.start
    162 ; CHECK: %addrld = load atomic i32, i32* %addr.i unordered, align 8
    163 ; CHECK: br label %loop
    164 entry: 
    165   %gep = getelementptr inbounds i8, i8* %addr, i64 8
    166   %addr.i = bitcast i8* %gep to i32 *
    167   store atomic i32 5, i32 * %addr.i unordered, align 8
    168   fence release
    169   %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
    170   br label %loop
    171 
    172 loop:
    173   %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
    174   %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
    175   %volload = load atomic i8, i8* %volatile unordered, align 8
    176   fence acquire
    177   %volchk = icmp eq i8 %volload, 0
    178   %addrld = load atomic i32, i32* %addr.i unordered, align 8
    179   %sel = select i1 %volchk, i32 0, i32 %addrld
    180   %sum.next = add i32 %sel, %sum
    181   %indvar.next = add i32 %indvar, 1
    182   %cond = icmp slt i32 %indvar.next, %n
    183   br i1 %cond, label %loop, label %loopexit
    184 
    185 loopexit:
    186   ret i32 %sum
    187 }
    188 
    189 
    190 
    191 ; Same as test above, but the load is no longer invariant (presence of
    192 ; invariant.end). We cannot hoist the addrld out of loop.
    193 define i32 @test_fence1(i8* %addr, i32 %n, i8* %volatile) {
    194 ; CHECK-LABEL: @test_fence1
    195 ; CHECK-LABEL: entry
    196 ; CHECK: invariant.start
    197 ; CHECK-NEXT: invariant.end
    198 ; CHECK-NEXT: br label %loop
    199 entry:
    200   %gep = getelementptr inbounds i8, i8* %addr, i64 8
    201   %addr.i = bitcast i8* %gep to i32 *
    202   store atomic i32 5, i32 * %addr.i unordered, align 8
    203   fence release
    204   %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
    205   call void @llvm.invariant.end.p0i8({}* %invst, i64 4, i8* %gep)
    206   br label %loop
    207 
    208 loop:
    209   %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
    210   %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
    211   %volload = load atomic i8, i8* %volatile unordered, align 8
    212   fence acquire
    213   %volchk = icmp eq i8 %volload, 0
    214   %addrld = load atomic i32, i32* %addr.i unordered, align 8
    215   %sel = select i1 %volchk, i32 0, i32 %addrld
    216   %sum.next = add i32 %sel, %sum
    217   %indvar.next = add i32 %indvar, 1
    218   %cond = icmp slt i32 %indvar.next, %n
    219   br i1 %cond, label %loop, label %loopexit
    220 
    221 loopexit:
    222   ret i32 %sum
    223 }
    224 
    225 ; same as test above, but instead of invariant.end, we have the result of
    226 ; invariant.start escaping through a call. We cannot hoist the load.
    227 define i32 @test_fence2(i8* %addr, i32 %n, i8* %volatile) {
    228 ; CHECK-LABEL: @test_fence2
    229 ; CHECK-LABEL: entry
    230 ; CHECK-NOT: load
    231 ; CHECK: br label %loop
    232 entry:
    233   %gep = getelementptr inbounds i8, i8* %addr, i64 8
    234   %addr.i = bitcast i8* %gep to i32 *
    235   store atomic i32 5, i32 * %addr.i unordered, align 8
    236   fence release
    237   %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
    238   call void @escaping.invariant.start({}* %invst)
    239   br label %loop
    240 
    241 loop:
    242   %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
    243   %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
    244   %volload = load atomic i8, i8* %volatile unordered, align 8
    245   fence acquire
    246   %volchk = icmp eq i8 %volload, 0
    247   %addrld = load atomic i32, i32* %addr.i unordered, align 8
    248   %sel = select i1 %volchk, i32 0, i32 %addrld
    249   %sum.next = add i32 %sel, %sum
    250   %indvar.next = add i32 %indvar, 1
    251   %cond = icmp slt i32 %indvar.next, %n
    252   br i1 %cond, label %loop, label %loopexit
    253 
    254 loopexit:
    255   ret i32 %sum
    256 }
    257 
    258 ; FIXME: invariant.start dominates the load, and in this scope, the
    259 ; load is invariant. So, we can hoist the `addrld` load out of the loop.
    260 ; Consider the loadoperand addr.i bitcasted before being passed to
    261 ; invariant.start
    262 define i32 @test_fence3(i32* %addr, i32 %n, i8* %volatile) {
    263 ; CHECK-LABEL: @test_fence3
    264 ; CHECK-LABEL: entry
    265 ; CHECK: invariant.start
    266 ; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
    267 ; CHECK: br label %loop
    268 entry: 
    269   %addr.i = getelementptr inbounds i32, i32* %addr, i64 8
    270   %gep = bitcast i32* %addr.i to i8 *
    271   store atomic i32 5, i32 * %addr.i unordered, align 8
    272   fence release
    273   %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
    274   br label %loop
    275 
    276 loop:
    277   %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
    278   %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
    279   %volload = load atomic i8, i8* %volatile unordered, align 8
    280   fence acquire
    281   %volchk = icmp eq i8 %volload, 0
    282   %addrld = load atomic i32, i32* %addr.i unordered, align 8
    283   %sel = select i1 %volchk, i32 0, i32 %addrld
    284   %sum.next = add i32 %sel, %sum
    285   %indvar.next = add i32 %indvar, 1
    286   %cond = icmp slt i32 %indvar.next, %n
    287   br i1 %cond, label %loop, label %loopexit
    288 
    289 loopexit:
    290   ret i32 %sum
    291 }
    292 
    293 ; We should not hoist the addrld out of the loop.
    294 define i32 @test_fence4(i32* %addr, i32 %n, i8* %volatile) {
    295 ; CHECK-LABEL: @test_fence4
    296 ; CHECK-LABEL: entry
    297 ; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
    298 ; CHECK: br label %loop
    299 entry: 
    300   %addr.i = getelementptr inbounds i32, i32* %addr, i64 8
    301   %gep = bitcast i32* %addr.i to i8 *
    302   br label %loop
    303 
    304 loop:
    305   %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
    306   %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
    307   store atomic i32 5, i32 * %addr.i unordered, align 8
    308   fence release
    309   %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
    310   %volload = load atomic i8, i8* %volatile unordered, align 8
    311   fence acquire
    312   %volchk = icmp eq i8 %volload, 0
    313   %addrld = load atomic i32, i32* %addr.i unordered, align 8
    314   %sel = select i1 %volchk, i32 0, i32 %addrld
    315   %sum.next = add i32 %sel, %sum
    316   %indvar.next = add i32 %indvar, 1
    317   %cond = icmp slt i32 %indvar.next, %n
    318   br i1 %cond, label %loop, label %loopexit
    319 
    320 loopexit:
    321   ret i32 %sum
    322 }
    323