1 ; RUN: opt < %s -licm -S | FileCheck %s 2 ; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S | FileCheck %s 3 4 @X = global i32 0 ; <i32*> [#uses=1] 5 6 declare void @foo() 7 8 declare i32 @llvm.bitreverse.i32(i32) 9 10 ; This testcase tests for a problem where LICM hoists 11 ; potentially trapping instructions when they are not guaranteed to execute. 12 define i32 @test1(i1 %c) { 13 ; CHECK-LABEL: @test1( 14 %A = load i32, i32* @X ; <i32> [#uses=2] 15 br label %Loop 16 Loop: ; preds = %LoopTail, %0 17 call void @foo( ) 18 br i1 %c, label %LoopTail, label %IfUnEqual 19 20 IfUnEqual: ; preds = %Loop 21 ; CHECK: IfUnEqual: 22 ; CHECK-NEXT: sdiv i32 4, %A 23 %B1 = sdiv i32 4, %A ; <i32> [#uses=1] 24 br label %LoopTail 25 26 LoopTail: ; preds = %IfUnEqual, %Loop 27 %B = phi i32 [ 0, %Loop ], [ %B1, %IfUnEqual ] ; <i32> [#uses=1] 28 br i1 %c, label %Loop, label %Out 29 Out: ; preds = %LoopTail 30 %C = sub i32 %A, %B ; <i32> [#uses=1] 31 ret i32 %C 32 } 33 34 35 declare void @foo2(i32) nounwind 36 37 38 ;; It is ok and desirable to hoist this potentially trapping instruction. 39 define i32 @test2(i1 %c) { 40 ; CHECK-LABEL: @test2( 41 ; CHECK-NEXT: load i32, i32* @X 42 ; CHECK-NEXT: %B = sdiv i32 4, %A 43 %A = load i32, i32* @X 44 br label %Loop 45 46 Loop: 47 ;; Should have hoisted this div! 48 %B = sdiv i32 4, %A 49 br label %loop2 50 51 loop2: 52 call void @foo2( i32 %B ) 53 br i1 %c, label %Loop, label %Out 54 55 Out: 56 %C = sub i32 %A, %B 57 ret i32 %C 58 } 59 60 61 ; This loop invariant instruction should be constant folded, not hoisted. 62 define i32 @test3(i1 %c) { 63 ; CHECK-LABEL: define i32 @test3( 64 ; CHECK: call void @foo2(i32 6) 65 %A = load i32, i32* @X ; <i32> [#uses=2] 66 br label %Loop 67 Loop: 68 %B = add i32 4, 2 ; <i32> [#uses=2] 69 call void @foo2( i32 %B ) 70 br i1 %c, label %Loop, label %Out 71 Out: ; preds = %Loop 72 %C = sub i32 %A, %B ; <i32> [#uses=1] 73 ret i32 %C 74 } 75 76 ; CHECK-LABEL: @test4( 77 ; CHECK: call 78 ; CHECK: sdiv 79 ; CHECK: ret 80 define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp { 81 entry: 82 br label %for.body 83 84 for.body: ; preds = %entry, %for.body 85 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 86 %n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ] 87 call void @foo_may_call_exit(i32 0) 88 %div = sdiv i32 %x, %y 89 %add = add nsw i32 %n.01, %div 90 %inc = add nsw i32 %i.02, 1 91 %cmp = icmp slt i32 %inc, 10000 92 br i1 %cmp, label %for.body, label %for.end 93 94 for.end: ; preds = %for.body 95 %n.0.lcssa = phi i32 [ %add, %for.body ] 96 ret i32 %n.0.lcssa 97 } 98 99 declare void @foo_may_call_exit(i32) 100 101 ; PR14854 102 ; CHECK-LABEL: @test5( 103 ; CHECK: extractvalue 104 ; CHECK: br label %tailrecurse 105 ; CHECK: tailrecurse: 106 ; CHECK: ifend: 107 ; CHECK: insertvalue 108 define { i32*, i32 } @test5(i32 %i, { i32*, i32 } %e) { 109 entry: 110 br label %tailrecurse 111 112 tailrecurse: ; preds = %then, %entry 113 %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ] 114 %out = extractvalue { i32*, i32 } %e, 1 115 %d = insertvalue { i32*, i32 } %e, i32* null, 0 116 %cmp1 = icmp sgt i32 %out, %i.tr 117 br i1 %cmp1, label %then, label %ifend 118 119 then: ; preds = %tailrecurse 120 call void @foo() 121 %cmp2 = add i32 %i.tr, 1 122 br label %tailrecurse 123 124 ifend: ; preds = %tailrecurse 125 ret { i32*, i32 } %d 126 } 127 128 ; CHECK: define i32 @hoist_bitreverse(i32) 129 ; CHECK: bitreverse 130 ; CHECK: br label %header 131 define i32 @hoist_bitreverse(i32) { 132 br label %header 133 134 header: 135 %sum = phi i32 [ 0, %1 ], [ %5, %latch ] 136 %2 = phi i32 [ 0, %1 ], [ %6, %latch ] 137 %3 = icmp slt i32 %2, 1024 138 br i1 %3, label %body, label %return 139 140 body: 141 %4 = call i32 @llvm.bitreverse.i32(i32 %0) 142 %5 = add i32 %sum, %4 143 br label %latch 144 145 latch: 146 %6 = add nsw i32 %2, 1 147 br label %header 148 149 return: 150 ret i32 %sum 151 } 152 153 declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly 154 declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind 155 declare void @escaping.invariant.start({}*) nounwind 156 ; invariant.start dominates the load, and in this scope, the 157 ; load is invariant. So, we can hoist the `addrld` load out of the loop. 158 define i32 @test_fence(i8* %addr, i32 %n, i8* %volatile) { 159 ; CHECK-LABEL: @test_fence 160 ; CHECK-LABEL: entry 161 ; CHECK: invariant.start 162 ; CHECK: %addrld = load atomic i32, i32* %addr.i unordered, align 8 163 ; CHECK: br label %loop 164 entry: 165 %gep = getelementptr inbounds i8, i8* %addr, i64 8 166 %addr.i = bitcast i8* %gep to i32 * 167 store atomic i32 5, i32 * %addr.i unordered, align 8 168 fence release 169 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) 170 br label %loop 171 172 loop: 173 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 174 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 175 %volload = load atomic i8, i8* %volatile unordered, align 8 176 fence acquire 177 %volchk = icmp eq i8 %volload, 0 178 %addrld = load atomic i32, i32* %addr.i unordered, align 8 179 %sel = select i1 %volchk, i32 0, i32 %addrld 180 %sum.next = add i32 %sel, %sum 181 %indvar.next = add i32 %indvar, 1 182 %cond = icmp slt i32 %indvar.next, %n 183 br i1 %cond, label %loop, label %loopexit 184 185 loopexit: 186 ret i32 %sum 187 } 188 189 190 191 ; Same as test above, but the load is no longer invariant (presence of 192 ; invariant.end). We cannot hoist the addrld out of loop. 193 define i32 @test_fence1(i8* %addr, i32 %n, i8* %volatile) { 194 ; CHECK-LABEL: @test_fence1 195 ; CHECK-LABEL: entry 196 ; CHECK: invariant.start 197 ; CHECK-NEXT: invariant.end 198 ; CHECK-NEXT: br label %loop 199 entry: 200 %gep = getelementptr inbounds i8, i8* %addr, i64 8 201 %addr.i = bitcast i8* %gep to i32 * 202 store atomic i32 5, i32 * %addr.i unordered, align 8 203 fence release 204 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) 205 call void @llvm.invariant.end.p0i8({}* %invst, i64 4, i8* %gep) 206 br label %loop 207 208 loop: 209 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 210 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 211 %volload = load atomic i8, i8* %volatile unordered, align 8 212 fence acquire 213 %volchk = icmp eq i8 %volload, 0 214 %addrld = load atomic i32, i32* %addr.i unordered, align 8 215 %sel = select i1 %volchk, i32 0, i32 %addrld 216 %sum.next = add i32 %sel, %sum 217 %indvar.next = add i32 %indvar, 1 218 %cond = icmp slt i32 %indvar.next, %n 219 br i1 %cond, label %loop, label %loopexit 220 221 loopexit: 222 ret i32 %sum 223 } 224 225 ; same as test above, but instead of invariant.end, we have the result of 226 ; invariant.start escaping through a call. We cannot hoist the load. 227 define i32 @test_fence2(i8* %addr, i32 %n, i8* %volatile) { 228 ; CHECK-LABEL: @test_fence2 229 ; CHECK-LABEL: entry 230 ; CHECK-NOT: load 231 ; CHECK: br label %loop 232 entry: 233 %gep = getelementptr inbounds i8, i8* %addr, i64 8 234 %addr.i = bitcast i8* %gep to i32 * 235 store atomic i32 5, i32 * %addr.i unordered, align 8 236 fence release 237 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) 238 call void @escaping.invariant.start({}* %invst) 239 br label %loop 240 241 loop: 242 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 243 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 244 %volload = load atomic i8, i8* %volatile unordered, align 8 245 fence acquire 246 %volchk = icmp eq i8 %volload, 0 247 %addrld = load atomic i32, i32* %addr.i unordered, align 8 248 %sel = select i1 %volchk, i32 0, i32 %addrld 249 %sum.next = add i32 %sel, %sum 250 %indvar.next = add i32 %indvar, 1 251 %cond = icmp slt i32 %indvar.next, %n 252 br i1 %cond, label %loop, label %loopexit 253 254 loopexit: 255 ret i32 %sum 256 } 257 258 ; FIXME: invariant.start dominates the load, and in this scope, the 259 ; load is invariant. So, we can hoist the `addrld` load out of the loop. 260 ; Consider the loadoperand addr.i bitcasted before being passed to 261 ; invariant.start 262 define i32 @test_fence3(i32* %addr, i32 %n, i8* %volatile) { 263 ; CHECK-LABEL: @test_fence3 264 ; CHECK-LABEL: entry 265 ; CHECK: invariant.start 266 ; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8 267 ; CHECK: br label %loop 268 entry: 269 %addr.i = getelementptr inbounds i32, i32* %addr, i64 8 270 %gep = bitcast i32* %addr.i to i8 * 271 store atomic i32 5, i32 * %addr.i unordered, align 8 272 fence release 273 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) 274 br label %loop 275 276 loop: 277 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 278 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 279 %volload = load atomic i8, i8* %volatile unordered, align 8 280 fence acquire 281 %volchk = icmp eq i8 %volload, 0 282 %addrld = load atomic i32, i32* %addr.i unordered, align 8 283 %sel = select i1 %volchk, i32 0, i32 %addrld 284 %sum.next = add i32 %sel, %sum 285 %indvar.next = add i32 %indvar, 1 286 %cond = icmp slt i32 %indvar.next, %n 287 br i1 %cond, label %loop, label %loopexit 288 289 loopexit: 290 ret i32 %sum 291 } 292 293 ; We should not hoist the addrld out of the loop. 294 define i32 @test_fence4(i32* %addr, i32 %n, i8* %volatile) { 295 ; CHECK-LABEL: @test_fence4 296 ; CHECK-LABEL: entry 297 ; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8 298 ; CHECK: br label %loop 299 entry: 300 %addr.i = getelementptr inbounds i32, i32* %addr, i64 8 301 %gep = bitcast i32* %addr.i to i8 * 302 br label %loop 303 304 loop: 305 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 306 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 307 store atomic i32 5, i32 * %addr.i unordered, align 8 308 fence release 309 %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) 310 %volload = load atomic i8, i8* %volatile unordered, align 8 311 fence acquire 312 %volchk = icmp eq i8 %volload, 0 313 %addrld = load atomic i32, i32* %addr.i unordered, align 8 314 %sel = select i1 %volchk, i32 0, i32 %addrld 315 %sum.next = add i32 %sel, %sum 316 %indvar.next = add i32 %indvar, 1 317 %cond = icmp slt i32 %indvar.next, %n 318 br i1 %cond, label %loop, label %loopexit 319 320 loopexit: 321 ret i32 %sum 322 } 323