1 ; RUN: opt -basicaa -gvn -S < %s | FileCheck %s 2 3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 4 target triple = "x86_64-apple-macosx10.7.0" 5 6 @x = common global i32 0, align 4 7 @y = common global i32 0, align 4 8 9 ; GVN across unordered store (allowed) 10 define i32 @test1() nounwind uwtable ssp { 11 ; CHECK-LABEL: test1 12 ; CHECK: add i32 %x, %x 13 entry: 14 %x = load i32, i32* @y 15 store atomic i32 %x, i32* @x unordered, align 4 16 %y = load i32, i32* @y 17 %z = add i32 %x, %y 18 ret i32 %z 19 } 20 21 ; GVN across unordered load (allowed) 22 define i32 @test3() nounwind uwtable ssp { 23 ; CHECK-LABEL: test3 24 ; CHECK: add i32 %x, %x 25 entry: 26 %x = load i32, i32* @y 27 %y = load atomic i32, i32* @x unordered, align 4 28 %z = load i32, i32* @y 29 %a = add i32 %x, %z 30 %b = add i32 %y, %a 31 ret i32 %b 32 } 33 34 ; GVN load to unordered load (allowed) 35 define i32 @test5() nounwind uwtable ssp { 36 ; CHECK-LABEL: test5 37 ; CHECK: add i32 %x, %x 38 entry: 39 %x = load atomic i32, i32* @x unordered, align 4 40 %y = load i32, i32* @x 41 %z = add i32 %x, %y 42 ret i32 %z 43 } 44 45 ; GVN unordered load to load (unordered load must not be removed) 46 define i32 @test6() nounwind uwtable ssp { 47 ; CHECK-LABEL: test6 48 ; CHECK: load atomic i32, i32* @x unordered 49 entry: 50 %x = load i32, i32* @x 51 %x2 = load atomic i32, i32* @x unordered, align 4 52 %x3 = add i32 %x, %x2 53 ret i32 %x3 54 } 55 56 ; GVN across release-acquire pair (forbidden) 57 define i32 @test7() nounwind uwtable ssp { 58 ; CHECK-LABEL: test7 59 ; CHECK: add i32 %x, %y 60 entry: 61 %x = load i32, i32* @y 62 store atomic i32 %x, i32* @x release, align 4 63 %w = load atomic i32, i32* @x acquire, align 4 64 %y = load i32, i32* @y 65 %z = add i32 %x, %y 66 ret i32 %z 67 } 68 69 ; GVN across monotonic store (allowed) 70 define i32 @test9() nounwind uwtable ssp { 71 ; CHECK-LABEL: test9 72 ; CHECK: add i32 %x, %x 73 entry: 74 %x = load i32, i32* @y 75 store atomic i32 %x, i32* @x monotonic, align 4 76 %y = load i32, i32* @y 77 %z = add i32 %x, %y 78 ret i32 %z 79 } 80 81 ; GVN of an unordered across monotonic load (not allowed) 82 define i32 @test10() nounwind uwtable ssp { 83 ; CHECK-LABEL: test10 84 ; CHECK: add i32 %x, %y 85 entry: 86 %x = load atomic i32, i32* @y unordered, align 4 87 %clobber = load atomic i32, i32* @x monotonic, align 4 88 %y = load atomic i32, i32* @y monotonic, align 4 89 %z = add i32 %x, %y 90 ret i32 %z 91 } 92 93 define i32 @PR22708(i1 %flag) { 94 ; CHECK-LABEL: PR22708 95 entry: 96 br i1 %flag, label %if.then, label %if.end 97 98 if.then: 99 store i32 43, i32* @y, align 4 100 ; CHECK: store i32 43, i32* @y, align 4 101 br label %if.end 102 103 if.end: 104 load atomic i32, i32* @x acquire, align 4 105 %load = load i32, i32* @y, align 4 106 ; CHECK: load atomic i32, i32* @x acquire, align 4 107 ; CHECK: load i32, i32* @y, align 4 108 ret i32 %load 109 } 110 111 ; CHECK-LABEL: @test12( 112 ; Can't remove a load over a ordering barrier 113 define i32 @test12(i1 %B, i32* %P1, i32* %P2) { 114 %load0 = load i32, i32* %P1 115 %1 = load atomic i32, i32* %P2 seq_cst, align 4 116 %load1 = load i32, i32* %P1 117 %sel = select i1 %B, i32 %load0, i32 %load1 118 ret i32 %sel 119 ; CHECK: load i32, i32* %P1 120 ; CHECK: load i32, i32* %P1 121 } 122 123 ; CHECK-LABEL: @test13( 124 ; atomic to non-atomic forwarding is legal 125 define i32 @test13(i32* %P1) { 126 %a = load atomic i32, i32* %P1 seq_cst, align 4 127 %b = load i32, i32* %P1 128 %res = sub i32 %a, %b 129 ret i32 %res 130 ; CHECK: load atomic i32, i32* %P1 131 ; CHECK: ret i32 0 132 } 133 134 ; CHECK-LABEL: @test13b( 135 define i32 @test13b(i32* %P1) { 136 store atomic i32 0, i32* %P1 unordered, align 4 137 %b = load i32, i32* %P1 138 ret i32 %b 139 ; CHECK: ret i32 0 140 } 141 142 ; CHECK-LABEL: @test14( 143 ; atomic to unordered atomic forwarding is legal 144 define i32 @test14(i32* %P1) { 145 %a = load atomic i32, i32* %P1 seq_cst, align 4 146 %b = load atomic i32, i32* %P1 unordered, align 4 147 %res = sub i32 %a, %b 148 ret i32 %res 149 ; CHECK: load atomic i32, i32* %P1 seq_cst 150 ; CHECK-NEXT: ret i32 0 151 } 152 153 ; CHECK-LABEL: @test15( 154 ; implementation restriction: can't forward to stonger 155 ; than unordered 156 define i32 @test15(i32* %P1, i32* %P2) { 157 %a = load atomic i32, i32* %P1 seq_cst, align 4 158 %b = load atomic i32, i32* %P1 seq_cst, align 4 159 %res = sub i32 %a, %b 160 ret i32 %res 161 ; CHECK: load atomic i32, i32* %P1 162 ; CHECK: load atomic i32, i32* %P1 163 } 164 165 ; CHECK-LABEL: @test16( 166 ; forwarding non-atomic to atomic is wrong! (However, 167 ; it would be legal to use the later value in place of the 168 ; former in this particular example. We just don't 169 ; do that right now.) 170 define i32 @test16(i32* %P1, i32* %P2) { 171 %a = load i32, i32* %P1, align 4 172 %b = load atomic i32, i32* %P1 unordered, align 4 173 %res = sub i32 %a, %b 174 ret i32 %res 175 ; CHECK: load i32, i32* %P1 176 ; CHECK: load atomic i32, i32* %P1 177 } 178 179 ; CHECK-LABEL: @test16b( 180 define i32 @test16b(i32* %P1) { 181 store i32 0, i32* %P1 182 %b = load atomic i32, i32* %P1 unordered, align 4 183 ret i32 %b 184 ; CHECK: load atomic i32, i32* %P1 185 } 186 187 ; Can't DSE across a full fence 188 define void @fence_seq_cst_store(i32* %P1, i32* %P2) { 189 ; CHECK-LABEL: @fence_seq_cst_store( 190 ; CHECK: store 191 ; CHECK: store atomic 192 ; CHECK: store 193 store i32 0, i32* %P1, align 4 194 store atomic i32 0, i32* %P2 seq_cst, align 4 195 store i32 0, i32* %P1, align 4 196 ret void 197 } 198 199 ; Can't DSE across a full fence 200 define void @fence_seq_cst(i32* %P1, i32* %P2) { 201 ; CHECK-LABEL: @fence_seq_cst( 202 ; CHECK: store 203 ; CHECK: fence seq_cst 204 ; CHECK: store 205 store i32 0, i32* %P1, align 4 206 fence seq_cst 207 store i32 0, i32* %P1, align 4 208 ret void 209 } 210 211 ; Can't DSE across a full singlethread fence 212 define void @fence_seq_cst_st(i32* %P1, i32* %P2) { 213 ; CHECK-LABEL: @fence_seq_cst_st( 214 ; CHECK: store 215 ; CHECK: fence singlethread seq_cst 216 ; CHECK: store 217 store i32 0, i32* %P1, align 4 218 fence singlethread seq_cst 219 store i32 0, i32* %P1, align 4 220 ret void 221 } 222 223 ; Can't DSE across a full fence 224 define void @fence_asm_sideeffect(i32* %P1, i32* %P2) { 225 ; CHECK-LABEL: @fence_asm_sideeffect( 226 ; CHECK: store 227 ; CHECK: call void asm sideeffect 228 ; CHECK: store 229 store i32 0, i32* %P1, align 4 230 call void asm sideeffect "", ""() 231 store i32 0, i32* %P1, align 4 232 ret void 233 } 234 235 ; Can't DSE across a full fence 236 define void @fence_asm_memory(i32* %P1, i32* %P2) { 237 ; CHECK-LABEL: @fence_asm_memory( 238 ; CHECK: store 239 ; CHECK: call void asm 240 ; CHECK: store 241 store i32 0, i32* %P1, align 4 242 call void asm "", "~{memory}"() 243 store i32 0, i32* %P1, align 4 244 ret void 245 } 246 247 ; Can't remove a volatile load 248 define i32 @volatile_load(i32* %P1, i32* %P2) { 249 %a = load i32, i32* %P1, align 4 250 %b = load volatile i32, i32* %P1, align 4 251 %res = sub i32 %a, %b 252 ret i32 %res 253 ; CHECK-LABEL: @volatile_load( 254 ; CHECK: load i32, i32* %P1 255 ; CHECK: load volatile i32, i32* %P1 256 } 257 258 ; Can't remove redundant volatile loads 259 define i32 @redundant_volatile_load(i32* %P1, i32* %P2) { 260 %a = load volatile i32, i32* %P1, align 4 261 %b = load volatile i32, i32* %P1, align 4 262 %res = sub i32 %a, %b 263 ret i32 %res 264 ; CHECK-LABEL: @redundant_volatile_load( 265 ; CHECK: load volatile i32, i32* %P1 266 ; CHECK: load volatile i32, i32* %P1 267 ; CHECK: sub 268 } 269 270 ; Can't DSE a volatile store 271 define void @volatile_store(i32* %P1, i32* %P2) { 272 ; CHECK-LABEL: @volatile_store( 273 ; CHECK: store volatile 274 ; CHECK: store 275 store volatile i32 0, i32* %P1, align 4 276 store i32 3, i32* %P1, align 4 277 ret void 278 } 279 280 ; Can't DSE a redundant volatile store 281 define void @redundant_volatile_store(i32* %P1, i32* %P2) { 282 ; CHECK-LABEL: @redundant_volatile_store( 283 ; CHECK: store volatile 284 ; CHECK: store volatile 285 store volatile i32 0, i32* %P1, align 4 286 store volatile i32 0, i32* %P1, align 4 287 ret void 288 } 289 290 ; Can value forward from volatiles 291 define i32 @test20(i32* %P1, i32* %P2) { 292 %a = load volatile i32, i32* %P1, align 4 293 %b = load i32, i32* %P1, align 4 294 %res = sub i32 %a, %b 295 ret i32 %res 296 ; CHECK-LABEL: @test20( 297 ; CHECK: load volatile i32, i32* %P1 298 ; CHECK: ret i32 0 299 } 300 301 ; We're currently conservative about widening 302 define i64 @widen1(i32* %P1) { 303 ; CHECK-LABEL: @widen1( 304 ; CHECK: load atomic i32, i32* %P1 305 ; CHECK: load atomic i64, i64* %p2 306 %p2 = bitcast i32* %P1 to i64* 307 %a = load atomic i32, i32* %P1 unordered, align 4 308 %b = load atomic i64, i64* %p2 unordered, align 4 309 %a64 = sext i32 %a to i64 310 %res = sub i64 %a64, %b 311 ret i64 %res 312 } 313 314 ; narrowing does work 315 define i64 @narrow(i32* %P1) { 316 ; CHECK-LABEL: @narrow( 317 ; CHECK: load atomic i64, i64* %p2 318 ; CHECK-NOT: load atomic i32, i32* %P1 319 %p2 = bitcast i32* %P1 to i64* 320 %a64 = load atomic i64, i64* %p2 unordered, align 4 321 %b = load atomic i32, i32* %P1 unordered, align 4 322 %b64 = sext i32 %b to i64 323 %res = sub i64 %a64, %b64 324 ret i64 %res 325 } 326 327 ; Missed optimization, we don't yet optimize ordered loads 328 define i64 @narrow2(i32* %P1) { 329 ; CHECK-LABEL: @narrow2( 330 ; CHECK: load atomic i64, i64* %p2 331 ; CHECK: load atomic i32, i32* %P1 332 %p2 = bitcast i32* %P1 to i64* 333 %a64 = load atomic i64, i64* %p2 acquire, align 4 334 %b = load atomic i32, i32* %P1 acquire, align 4 335 %b64 = sext i32 %b to i64 336 %res = sub i64 %a64, %b64 337 ret i64 %res 338 } 339 340 ; Note: The cross block FRE testing is deliberately light. All of the tricky 341 ; bits of legality are shared code with the block-local FRE above. These 342 ; are here only to show that we haven't obviously broken anything. 343 344 ; unordered atomic to unordered atomic 345 define i32 @non_local_fre(i32* %P1) { 346 ; CHECK-LABEL: @non_local_fre( 347 ; CHECK: load atomic i32, i32* %P1 348 ; CHECK: ret i32 0 349 ; CHECK: ret i32 0 350 %a = load atomic i32, i32* %P1 unordered, align 4 351 %cmp = icmp eq i32 %a, 0 352 br i1 %cmp, label %early, label %next 353 early: 354 ret i32 %a 355 next: 356 %b = load atomic i32, i32* %P1 unordered, align 4 357 %res = sub i32 %a, %b 358 ret i32 %res 359 } 360 361 ; unordered atomic to non-atomic 362 define i32 @non_local_fre2(i32* %P1) { 363 ; CHECK-LABEL: @non_local_fre2( 364 ; CHECK: load atomic i32, i32* %P1 365 ; CHECK: ret i32 0 366 ; CHECK: ret i32 0 367 %a = load atomic i32, i32* %P1 unordered, align 4 368 %cmp = icmp eq i32 %a, 0 369 br i1 %cmp, label %early, label %next 370 early: 371 ret i32 %a 372 next: 373 %b = load i32, i32* %P1 374 %res = sub i32 %a, %b 375 ret i32 %res 376 } 377 378 ; Can't forward ordered atomics. 379 define i32 @non_local_fre3(i32* %P1) { 380 ; CHECK-LABEL: @non_local_fre3( 381 ; CHECK: load atomic i32, i32* %P1 acquire 382 ; CHECK: ret i32 0 383 ; CHECK: load atomic i32, i32* %P1 acquire 384 ; CHECK: ret i32 %res 385 %a = load atomic i32, i32* %P1 acquire, align 4 386 %cmp = icmp eq i32 %a, 0 387 br i1 %cmp, label %early, label %next 388 early: 389 ret i32 %a 390 next: 391 %b = load atomic i32, i32* %P1 acquire, align 4 392 %res = sub i32 %a, %b 393 ret i32 %res 394 } 395 396 declare void @clobber() 397 398 ; unordered atomic to unordered atomic 399 define i32 @non_local_pre(i32* %P1) { 400 ; CHECK-LABEL: @non_local_pre( 401 ; CHECK: load atomic i32, i32* %P1 unordered 402 ; CHECK: load atomic i32, i32* %P1 unordered 403 ; CHECK: %b = phi i32 [ %b.pre, %early ], [ %a, %0 ] 404 ; CHECK: ret i32 %b 405 %a = load atomic i32, i32* %P1 unordered, align 4 406 %cmp = icmp eq i32 %a, 0 407 br i1 %cmp, label %early, label %next 408 early: 409 call void @clobber() 410 br label %next 411 next: 412 %b = load atomic i32, i32* %P1 unordered, align 4 413 ret i32 %b 414 } 415 416 ; unordered atomic to non-atomic 417 define i32 @non_local_pre2(i32* %P1) { 418 ; CHECK-LABEL: @non_local_pre2( 419 ; CHECK: load atomic i32, i32* %P1 unordered 420 ; CHECK: load i32, i32* %P1 421 ; CHECK: %b = phi i32 [ %b.pre, %early ], [ %a, %0 ] 422 ; CHECK: ret i32 %b 423 %a = load atomic i32, i32* %P1 unordered, align 4 424 %cmp = icmp eq i32 %a, 0 425 br i1 %cmp, label %early, label %next 426 early: 427 call void @clobber() 428 br label %next 429 next: 430 %b = load i32, i32* %P1 431 ret i32 %b 432 } 433 434 ; non-atomic to unordered atomic - can't forward! 435 define i32 @non_local_pre3(i32* %P1) { 436 ; CHECK-LABEL: @non_local_pre3( 437 ; CHECK: %a = load i32, i32* %P1 438 ; CHECK: %b = load atomic i32, i32* %P1 unordered 439 ; CHECK: ret i32 %b 440 %a = load i32, i32* %P1 441 %cmp = icmp eq i32 %a, 0 442 br i1 %cmp, label %early, label %next 443 early: 444 call void @clobber() 445 br label %next 446 next: 447 %b = load atomic i32, i32* %P1 unordered, align 4 448 ret i32 %b 449 } 450 451 ; ordered atomic to ordered atomic - can't forward 452 define i32 @non_local_pre4(i32* %P1) { 453 ; CHECK-LABEL: @non_local_pre4( 454 ; CHECK: %a = load atomic i32, i32* %P1 seq_cst 455 ; CHECK: %b = load atomic i32, i32* %P1 seq_cst 456 ; CHECK: ret i32 %b 457 %a = load atomic i32, i32* %P1 seq_cst, align 4 458 %cmp = icmp eq i32 %a, 0 459 br i1 %cmp, label %early, label %next 460 early: 461 call void @clobber() 462 br label %next 463 next: 464 %b = load atomic i32, i32* %P1 seq_cst, align 4 465 ret i32 %b 466 } 467 468 ; can't remove volatile on any path 469 define i32 @non_local_pre5(i32* %P1) { 470 ; CHECK-LABEL: @non_local_pre5( 471 ; CHECK: %a = load atomic i32, i32* %P1 seq_cst 472 ; CHECK: %b = load volatile i32, i32* %P1 473 ; CHECK: ret i32 %b 474 %a = load atomic i32, i32* %P1 seq_cst, align 4 475 %cmp = icmp eq i32 %a, 0 476 br i1 %cmp, label %early, label %next 477 early: 478 call void @clobber() 479 br label %next 480 next: 481 %b = load volatile i32, i32* %P1 482 ret i32 %b 483 } 484 485 486 ; ordered atomic to unordered atomic 487 define i32 @non_local_pre6(i32* %P1) { 488 ; CHECK-LABEL: @non_local_pre6( 489 ; CHECK: load atomic i32, i32* %P1 seq_cst 490 ; CHECK: load atomic i32, i32* %P1 unordered 491 ; CHECK: %b = phi i32 [ %b.pre, %early ], [ %a, %0 ] 492 ; CHECK: ret i32 %b 493 %a = load atomic i32, i32* %P1 seq_cst, align 4 494 %cmp = icmp eq i32 %a, 0 495 br i1 %cmp, label %early, label %next 496 early: 497 call void @clobber() 498 br label %next 499 next: 500 %b = load atomic i32, i32* %P1 unordered, align 4 501 ret i32 %b 502 } 503 504