1 ; RUN: opt < %s -S -early-cse | FileCheck %s 2 ; RUN: opt < %s -S -passes=early-cse | FileCheck %s 3 4 declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly 5 declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind 6 7 ; Check that we do load-load forwarding over invariant.start, since it does not 8 ; clobber memory 9 define i8 @test_bypass1(i8 *%P) { 10 ; CHECK-LABEL: @test_bypass1( 11 ; CHECK-NEXT: %V1 = load i8, i8* %P 12 ; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) 13 ; CHECK-NEXT: ret i8 0 14 15 %V1 = load i8, i8* %P 16 %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) 17 %V2 = load i8, i8* %P 18 %Diff = sub i8 %V1, %V2 19 ret i8 %Diff 20 } 21 22 23 ; Trivial Store->load forwarding over invariant.start 24 define i8 @test_bypass2(i8 *%P) { 25 ; CHECK-LABEL: @test_bypass2( 26 ; CHECK-NEXT: store i8 42, i8* %P 27 ; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) 28 ; CHECK-NEXT: ret i8 42 29 30 store i8 42, i8* %P 31 %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) 32 %V1 = load i8, i8* %P 33 ret i8 %V1 34 } 35 36 ; We can DSE over invariant.start calls, since the first store to 37 ; %P is valid, and the second store is actually unreachable based on semantics 38 ; of invariant.start. 39 define void @test_bypass3(i8* %P) { 40 ; CHECK-LABEL: @test_bypass3( 41 ; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) 42 ; CHECK-NEXT: store i8 60, i8* %P 43 44 store i8 50, i8* %P 45 %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) 46 store i8 60, i8* %P 47 ret void 48 } 49 50 51 ; FIXME: Now the first store can actually be eliminated, since there is no read within 52 ; the invariant region, between start and end. 53 define void @test_bypass4(i8* %P) { 54 55 ; CHECK-LABEL: @test_bypass4( 56 ; CHECK-NEXT: store i8 50, i8* %P 57 ; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) 58 ; CHECK-NEXT: call void @llvm.invariant.end.p0i8({}* %i, i64 1, i8* %P) 59 ; CHECK-NEXT: store i8 60, i8* %P 60 61 62 store i8 50, i8* %P 63 %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) 64 call void @llvm.invariant.end.p0i8({}* %i, i64 1, i8* %P) 65 store i8 60, i8* %P 66 ret void 67 } 68 69 70 declare void @clobber() 71 declare {}* @llvm.invariant.start.p0i32(i64 %size, i32* nocapture %ptr) 72 declare void @llvm.invariant.end.p0i32({}*, i64, i32* nocapture) nounwind 73 74 define i32 @test_before_load(i32* %p) { 75 ; CHECK-LABEL: @test_before_load 76 ; CHECK: ret i32 0 77 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) 78 %v1 = load i32, i32* %p 79 call void @clobber() 80 %v2 = load i32, i32* %p 81 %sub = sub i32 %v1, %v2 82 ret i32 %sub 83 } 84 85 define i32 @test_before_clobber(i32* %p) { 86 ; CHECK-LABEL: @test_before_clobber 87 ; CHECK: ret i32 0 88 %v1 = load i32, i32* %p 89 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) 90 call void @clobber() 91 %v2 = load i32, i32* %p 92 %sub = sub i32 %v1, %v2 93 ret i32 %sub 94 } 95 96 define i32 @test_duplicate_scope(i32* %p) { 97 ; CHECK-LABEL: @test_duplicate_scope 98 ; CHECK: ret i32 0 99 %v1 = load i32, i32* %p 100 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) 101 call void @clobber() 102 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) 103 %v2 = load i32, i32* %p 104 %sub = sub i32 %v1, %v2 105 ret i32 %sub 106 } 107 108 define i32 @test_unanalzyable_load(i32* %p) { 109 ; CHECK-LABEL: @test_unanalzyable_load 110 ; CHECK: ret i32 0 111 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) 112 call void @clobber() 113 %v1 = load i32, i32* %p 114 call void @clobber() 115 %v2 = load i32, i32* %p 116 %sub = sub i32 %v1, %v2 117 ret i32 %sub 118 } 119 120 define i32 @test_negative_after_clobber(i32* %p) { 121 ; CHECK-LABEL: @test_negative_after_clobber 122 ; CHECK: ret i32 %sub 123 %v1 = load i32, i32* %p 124 call void @clobber() 125 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) 126 %v2 = load i32, i32* %p 127 %sub = sub i32 %v1, %v2 128 ret i32 %sub 129 } 130 131 define i32 @test_merge(i32* %p, i1 %cnd) { 132 ; CHECK-LABEL: @test_merge 133 ; CHECK: ret i32 0 134 %v1 = load i32, i32* %p 135 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) 136 br i1 %cnd, label %merge, label %taken 137 138 taken: 139 call void @clobber() 140 br label %merge 141 merge: 142 %v2 = load i32, i32* %p 143 %sub = sub i32 %v1, %v2 144 ret i32 %sub 145 } 146 147 define i32 @test_negative_after_mergeclobber(i32* %p, i1 %cnd) { 148 ; CHECK-LABEL: @test_negative_after_mergeclobber 149 ; CHECK: ret i32 %sub 150 %v1 = load i32, i32* %p 151 br i1 %cnd, label %merge, label %taken 152 153 taken: 154 call void @clobber() 155 br label %merge 156 merge: 157 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) 158 %v2 = load i32, i32* %p 159 %sub = sub i32 %v1, %v2 160 ret i32 %sub 161 } 162 163 ; In theory, this version could work, but earlycse is incapable of 164 ; merging facts along distinct paths. 165 define i32 @test_false_negative_merge(i32* %p, i1 %cnd) { 166 ; CHECK-LABEL: @test_false_negative_merge 167 ; CHECK: ret i32 %sub 168 %v1 = load i32, i32* %p 169 br i1 %cnd, label %merge, label %taken 170 171 taken: 172 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) 173 call void @clobber() 174 br label %merge 175 merge: 176 %v2 = load i32, i32* %p 177 %sub = sub i32 %v1, %v2 178 ret i32 %sub 179 } 180 181 define i32 @test_merge_unanalyzable_load(i32* %p, i1 %cnd) { 182 ; CHECK-LABEL: @test_merge_unanalyzable_load 183 ; CHECK: ret i32 0 184 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) 185 call void @clobber() 186 %v1 = load i32, i32* %p 187 br i1 %cnd, label %merge, label %taken 188 189 taken: 190 call void @clobber() 191 br label %merge 192 merge: 193 %v2 = load i32, i32* %p 194 %sub = sub i32 %v1, %v2 195 ret i32 %sub 196 } 197 198 define void @test_dse_before_load(i32* %p, i1 %cnd) { 199 ; CHECK-LABEL: @test_dse_before_load 200 ; CHECK-NOT: store 201 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) 202 %v1 = load i32, i32* %p 203 call void @clobber() 204 store i32 %v1, i32* %p 205 ret void 206 } 207 208 define void @test_dse_after_load(i32* %p, i1 %cnd) { 209 ; CHECK-LABEL: @test_dse_after_load 210 ; CHECK-NOT: store 211 %v1 = load i32, i32* %p 212 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) 213 call void @clobber() 214 store i32 %v1, i32* %p 215 ret void 216 } 217 218 219 ; In this case, we have a false negative since MemoryLocation is implicitly 220 ; typed due to the user of a Value to represent the address. Note that other 221 ; passes will canonicalize away the bitcasts in this example. 222 define i32 @test_false_negative_types(i32* %p) { 223 ; CHECK-LABEL: @test_false_negative_types 224 ; CHECK: ret i32 %sub 225 call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) 226 %v1 = load i32, i32* %p 227 call void @clobber() 228 %pf = bitcast i32* %p to float* 229 %v2f = load float, float* %pf 230 %v2 = bitcast float %v2f to i32 231 %sub = sub i32 %v1, %v2 232 ret i32 %sub 233 } 234 235 define i32 @test_negative_size1(i32* %p) { 236 ; CHECK-LABEL: @test_negative_size1 237 ; CHECK: ret i32 %sub 238 call {}* @llvm.invariant.start.p0i32(i64 3, i32* %p) 239 %v1 = load i32, i32* %p 240 call void @clobber() 241 %v2 = load i32, i32* %p 242 %sub = sub i32 %v1, %v2 243 ret i32 %sub 244 } 245 246 define i32 @test_negative_size2(i32* %p) { 247 ; CHECK-LABEL: @test_negative_size2 248 ; CHECK: ret i32 %sub 249 call {}* @llvm.invariant.start.p0i32(i64 0, i32* %p) 250 %v1 = load i32, i32* %p 251 call void @clobber() 252 %v2 = load i32, i32* %p 253 %sub = sub i32 %v1, %v2 254 ret i32 %sub 255 } 256 257 define i32 @test_negative_scope(i32* %p) { 258 ; CHECK-LABEL: @test_negative_scope 259 ; CHECK: ret i32 %sub 260 %scope = call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) 261 call void @llvm.invariant.end.p0i32({}* %scope, i64 4, i32* %p) 262 %v1 = load i32, i32* %p 263 call void @clobber() 264 %v2 = load i32, i32* %p 265 %sub = sub i32 %v1, %v2 266 ret i32 %sub 267 } 268 269 define i32 @test_false_negative_scope(i32* %p) { 270 ; CHECK-LABEL: @test_false_negative_scope 271 ; CHECK: ret i32 %sub 272 %scope = call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) 273 %v1 = load i32, i32* %p 274 call void @clobber() 275 %v2 = load i32, i32* %p 276 call void @llvm.invariant.end.p0i32({}* %scope, i64 4, i32* %p) 277 %sub = sub i32 %v1, %v2 278 ret i32 %sub 279 } 280 281 ; Invariant load defact starts an invariant.start scope of the appropriate size 282 define i32 @test_invariant_load_scope(i32* %p) { 283 ; CHECK-LABEL: @test_invariant_load_scope 284 ; CHECK: ret i32 0 285 %v1 = load i32, i32* %p, !invariant.load !{} 286 call void @clobber() 287 %v2 = load i32, i32* %p 288 %sub = sub i32 %v1, %v2 289 ret i32 %sub 290 } 291