Home | History | Annotate | Download | only in GVN
      1 ; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
      2 
      3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
      4 target triple = "x86_64-apple-macosx10.7.0"
      5 
      6 @x = common global i32 0, align 4
      7 @y = common global i32 0, align 4
      8 
      9 ; GVN across unordered store (allowed)
     10 define i32 @test1() nounwind uwtable ssp {
     11 ; CHECK-LABEL: test1
     12 ; CHECK: add i32 %x, %x
     13 entry:
     14   %x = load i32, i32* @y
     15   store atomic i32 %x, i32* @x unordered, align 4
     16   %y = load i32, i32* @y
     17   %z = add i32 %x, %y
     18   ret i32 %z
     19 }
     20 
     21 ; GVN across unordered load (allowed)
     22 define i32 @test3() nounwind uwtable ssp {
     23 ; CHECK-LABEL: test3
     24 ; CHECK: add i32 %x, %x
     25 entry:
     26   %x = load i32, i32* @y
     27   %y = load atomic i32, i32* @x unordered, align 4
     28   %z = load i32, i32* @y
     29   %a = add i32 %x, %z
     30   %b = add i32 %y, %a
     31   ret i32 %b
     32 }
     33 
     34 ; GVN load to unordered load (allowed)
     35 define i32 @test5() nounwind uwtable ssp {
     36 ; CHECK-LABEL: test5
     37 ; CHECK: add i32 %x, %x
     38 entry:
     39   %x = load atomic i32, i32* @x unordered, align 4
     40   %y = load i32, i32* @x
     41   %z = add i32 %x, %y
     42   ret i32 %z
     43 }
     44 
     45 ; GVN unordered load to load (unordered load must not be removed)
     46 define i32 @test6() nounwind uwtable ssp {
     47 ; CHECK-LABEL: test6
     48 ; CHECK: load atomic i32, i32* @x unordered
     49 entry:
     50   %x = load i32, i32* @x
     51   %x2 = load atomic i32, i32* @x unordered, align 4
     52   %x3 = add i32 %x, %x2
     53   ret i32 %x3
     54 }
     55 
     56 ; GVN across release-acquire pair (forbidden)
     57 define i32 @test7() nounwind uwtable ssp {
     58 ; CHECK-LABEL: test7
     59 ; CHECK: add i32 %x, %y
     60 entry:
     61   %x = load i32, i32* @y
     62   store atomic i32 %x, i32* @x release, align 4
     63   %w = load atomic i32, i32* @x acquire, align 4
     64   %y = load i32, i32* @y
     65   %z = add i32 %x, %y
     66   ret i32 %z
     67 }
     68 
     69 ; GVN across monotonic store (allowed)
     70 define i32 @test9() nounwind uwtable ssp {
     71 ; CHECK-LABEL: test9
     72 ; CHECK: add i32 %x, %x
     73 entry:
     74   %x = load i32, i32* @y
     75   store atomic i32 %x, i32* @x monotonic, align 4
     76   %y = load i32, i32* @y
     77   %z = add i32 %x, %y
     78   ret i32 %z
     79 }
     80 
     81 ; GVN of an unordered across monotonic load (not allowed)
     82 define i32 @test10() nounwind uwtable ssp {
     83 ; CHECK-LABEL: test10
     84 ; CHECK: add i32 %x, %y
     85 entry:
     86   %x = load atomic i32, i32* @y unordered, align 4
     87   %clobber = load atomic i32, i32* @x monotonic, align 4
     88   %y = load atomic i32, i32* @y monotonic, align 4
     89   %z = add i32 %x, %y
     90   ret i32 %z
     91 }
     92 
     93 define i32 @PR22708(i1 %flag) {
     94 ; CHECK-LABEL: PR22708
     95 entry:
     96   br i1 %flag, label %if.then, label %if.end
     97 
     98 if.then:
     99   store i32 43, i32* @y, align 4
    100 ; CHECK: store i32 43, i32* @y, align 4
    101   br label %if.end
    102 
    103 if.end:
    104   load atomic i32, i32* @x acquire, align 4
    105   %load = load i32, i32* @y, align 4
    106 ; CHECK: load atomic i32, i32* @x acquire, align 4
    107 ; CHECK: load i32, i32* @y, align 4
    108   ret i32 %load
    109 }
    110 
    111 ; CHECK-LABEL: @test12(
    112 ; Can't remove a load over a ordering barrier
    113 define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
    114   %load0 = load i32, i32* %P1
    115   %1 = load atomic i32, i32* %P2 seq_cst, align 4
    116   %load1 = load i32, i32* %P1
    117   %sel = select i1 %B, i32 %load0, i32 %load1
    118   ret i32 %sel
    119   ; CHECK: load i32, i32* %P1
    120   ; CHECK: load i32, i32* %P1
    121 }
    122 
    123 ; CHECK-LABEL: @test13(
    124 ; atomic to non-atomic forwarding is legal
    125 define i32 @test13(i32* %P1) {
    126   %a = load atomic i32, i32* %P1 seq_cst, align 4
    127   %b = load i32, i32* %P1
    128   %res = sub i32 %a, %b
    129   ret i32 %res
    130   ; CHECK: load atomic i32, i32* %P1
    131   ; CHECK: ret i32 0
    132 }
    133 
    134 ; CHECK-LABEL: @test13b(
    135 define i32 @test13b(i32* %P1) {
    136   store  atomic i32 0, i32* %P1 unordered, align 4
    137   %b = load i32, i32* %P1
    138   ret i32 %b
    139   ; CHECK: ret i32 0
    140 }
    141 
    142 ; CHECK-LABEL: @test14(
    143 ; atomic to unordered atomic forwarding is legal
    144 define i32 @test14(i32* %P1) {
    145   %a = load atomic i32, i32* %P1 seq_cst, align 4
    146   %b = load atomic i32, i32* %P1 unordered, align 4
    147   %res = sub i32 %a, %b
    148   ret i32 %res
    149   ; CHECK: load atomic i32, i32* %P1 seq_cst
    150   ; CHECK-NEXT: ret i32 0
    151 }
    152 
    153 ; CHECK-LABEL: @test15(
    154 ; implementation restriction: can't forward to stonger
    155 ; than unordered
    156 define i32 @test15(i32* %P1, i32* %P2) {
    157   %a = load atomic i32, i32* %P1 seq_cst, align 4
    158   %b = load atomic i32, i32* %P1 seq_cst, align 4
    159   %res = sub i32 %a, %b
    160   ret i32 %res
    161   ; CHECK: load atomic i32, i32* %P1
    162   ; CHECK: load atomic i32, i32* %P1
    163 }
    164 
    165 ; CHECK-LABEL: @test16(
    166 ; forwarding non-atomic to atomic is wrong! (However,
    167 ; it would be legal to use the later value in place of the
    168 ; former in this particular example.  We just don't
    169 ; do that right now.)
    170 define i32 @test16(i32* %P1, i32* %P2) {
    171   %a = load i32, i32* %P1, align 4
    172   %b = load atomic i32, i32* %P1 unordered, align 4
    173   %res = sub i32 %a, %b
    174   ret i32 %res
    175   ; CHECK: load i32, i32* %P1
    176   ; CHECK: load atomic i32, i32* %P1
    177 }
    178 
    179 ; CHECK-LABEL: @test16b(
    180 define i32 @test16b(i32* %P1) {
    181   store i32 0, i32* %P1
    182   %b = load atomic i32, i32* %P1 unordered, align 4
    183   ret i32 %b
    184   ; CHECK: load atomic i32, i32* %P1
    185 }
    186 
    187 ; Can't DSE across a full fence
    188 define void @fence_seq_cst_store(i32* %P1, i32* %P2) {
    189 ; CHECK-LABEL: @fence_seq_cst_store(
    190 ; CHECK: store
    191 ; CHECK: store atomic
    192 ; CHECK: store
    193   store i32 0, i32* %P1, align 4
    194   store atomic i32 0, i32* %P2 seq_cst, align 4
    195   store i32 0, i32* %P1, align 4
    196   ret void
    197 }
    198 
    199 ; Can't DSE across a full fence
    200 define void @fence_seq_cst(i32* %P1, i32* %P2) {
    201 ; CHECK-LABEL: @fence_seq_cst(
    202 ; CHECK: store
    203 ; CHECK: fence seq_cst
    204 ; CHECK: store
    205   store i32 0, i32* %P1, align 4
    206   fence seq_cst
    207   store i32 0, i32* %P1, align 4
    208   ret void
    209 }
    210 
    211 ; Can't DSE across a full singlethread fence
    212 define void @fence_seq_cst_st(i32* %P1, i32* %P2) {
    213 ; CHECK-LABEL: @fence_seq_cst_st(
    214 ; CHECK: store
    215 ; CHECK: fence singlethread seq_cst
    216 ; CHECK: store
    217   store i32 0, i32* %P1, align 4
    218   fence singlethread seq_cst
    219   store i32 0, i32* %P1, align 4
    220   ret void
    221 }
    222 
    223 ; Can't DSE across a full fence
    224 define void @fence_asm_sideeffect(i32* %P1, i32* %P2) {
    225 ; CHECK-LABEL: @fence_asm_sideeffect(
    226 ; CHECK: store
    227 ; CHECK: call void asm sideeffect
    228 ; CHECK: store
    229   store i32 0, i32* %P1, align 4
    230   call void asm sideeffect "", ""()
    231   store i32 0, i32* %P1, align 4
    232   ret void
    233 }
    234 
    235 ; Can't DSE across a full fence
    236 define void @fence_asm_memory(i32* %P1, i32* %P2) {
    237 ; CHECK-LABEL: @fence_asm_memory(
    238 ; CHECK: store
    239 ; CHECK: call void asm
    240 ; CHECK: store
    241   store i32 0, i32* %P1, align 4
    242   call void asm "", "~{memory}"()
    243   store i32 0, i32* %P1, align 4
    244   ret void
    245 }
    246 
    247 ; Can't remove a volatile load
    248 define i32 @volatile_load(i32* %P1, i32* %P2) {
    249   %a = load i32, i32* %P1, align 4
    250   %b = load volatile i32, i32* %P1, align 4
    251   %res = sub i32 %a, %b
    252   ret i32 %res
    253   ; CHECK-LABEL: @volatile_load(
    254   ; CHECK: load i32, i32* %P1
    255   ; CHECK: load volatile i32, i32* %P1
    256 }
    257 
    258 ; Can't remove redundant volatile loads
    259 define i32 @redundant_volatile_load(i32* %P1, i32* %P2) {
    260   %a = load volatile i32, i32* %P1, align 4
    261   %b = load volatile i32, i32* %P1, align 4
    262   %res = sub i32 %a, %b
    263   ret i32 %res
    264   ; CHECK-LABEL: @redundant_volatile_load(
    265   ; CHECK: load volatile i32, i32* %P1
    266   ; CHECK: load volatile i32, i32* %P1
    267   ; CHECK: sub
    268 }
    269 
    270 ; Can't DSE a volatile store
    271 define void @volatile_store(i32* %P1, i32* %P2) {
    272 ; CHECK-LABEL: @volatile_store(
    273 ; CHECK: store volatile
    274 ; CHECK: store
    275   store volatile i32 0, i32* %P1, align 4
    276   store i32 3, i32* %P1, align 4
    277   ret void
    278 }
    279 
    280 ; Can't DSE a redundant volatile store
    281 define void @redundant_volatile_store(i32* %P1, i32* %P2) {
    282 ; CHECK-LABEL: @redundant_volatile_store(
    283 ; CHECK: store volatile
    284 ; CHECK: store volatile
    285   store volatile i32 0, i32* %P1, align 4
    286   store volatile i32 0, i32* %P1, align 4
    287   ret void
    288 }
    289 
    290 ; Can value forward from volatiles
    291 define i32 @test20(i32* %P1, i32* %P2) {
    292   %a = load volatile i32, i32* %P1, align 4
    293   %b = load i32, i32* %P1, align 4
    294   %res = sub i32 %a, %b
    295   ret i32 %res
    296   ; CHECK-LABEL: @test20(
    297   ; CHECK: load volatile i32, i32* %P1
    298   ; CHECK: ret i32 0
    299 }
    300 
    301 ; We're currently conservative about widening
    302 define i64 @widen1(i32* %P1) {
    303   ; CHECK-LABEL: @widen1(
    304   ; CHECK: load atomic i32, i32* %P1
    305   ; CHECK: load atomic i64, i64* %p2
    306   %p2 = bitcast i32* %P1 to i64*
    307   %a = load atomic i32, i32* %P1 unordered, align 4
    308   %b = load atomic i64, i64* %p2 unordered, align 4
    309   %a64 = sext i32 %a to i64
    310   %res = sub i64 %a64, %b
    311   ret i64 %res
    312 }
    313 
    314 ; narrowing does work
    315 define i64 @narrow(i32* %P1) {
    316   ; CHECK-LABEL: @narrow(
    317   ; CHECK: load atomic i64, i64* %p2
    318   ; CHECK-NOT: load atomic i32, i32* %P1
    319   %p2 = bitcast i32* %P1 to i64*
    320   %a64 = load atomic i64, i64* %p2 unordered, align 4
    321   %b = load atomic i32, i32* %P1 unordered, align 4
    322   %b64 = sext i32 %b to i64
    323   %res = sub i64 %a64, %b64
    324   ret i64 %res
    325 }
    326 
    327 ; Missed optimization, we don't yet optimize ordered loads
    328 define i64 @narrow2(i32* %P1) {
    329   ; CHECK-LABEL: @narrow2(
    330   ; CHECK: load atomic i64, i64* %p2
    331   ; CHECK: load atomic i32, i32* %P1
    332   %p2 = bitcast i32* %P1 to i64*
    333   %a64 = load atomic i64, i64* %p2 acquire, align 4
    334   %b = load atomic i32, i32* %P1 acquire, align 4
    335   %b64 = sext i32 %b to i64
    336   %res = sub i64 %a64, %b64
    337   ret i64 %res
    338 }
    339 
    340 ; Note: The cross block FRE testing is deliberately light.  All of the tricky
    341 ; bits of legality are shared code with the block-local FRE above.  These
    342 ; are here only to show that we haven't obviously broken anything.
    343 
    344 ; unordered atomic to unordered atomic
    345 define i32 @non_local_fre(i32* %P1) {
    346 ; CHECK-LABEL: @non_local_fre(
    347 ; CHECK: load atomic i32, i32* %P1
    348 ; CHECK: ret i32 0
    349 ; CHECK: ret i32 0
    350   %a = load atomic i32, i32* %P1 unordered, align 4
    351   %cmp = icmp eq i32 %a, 0
    352   br i1 %cmp, label %early, label %next
    353 early:
    354   ret i32 %a
    355 next:
    356   %b = load atomic i32, i32* %P1 unordered, align 4
    357   %res = sub i32 %a, %b
    358   ret i32 %res
    359 }
    360 
    361 ; unordered atomic to non-atomic
    362 define i32 @non_local_fre2(i32* %P1) {
    363 ; CHECK-LABEL: @non_local_fre2(
    364 ; CHECK: load atomic i32, i32* %P1
    365 ; CHECK: ret i32 0
    366 ; CHECK: ret i32 0
    367   %a = load atomic i32, i32* %P1 unordered, align 4
    368   %cmp = icmp eq i32 %a, 0
    369   br i1 %cmp, label %early, label %next
    370 early:
    371   ret i32 %a
    372 next:
    373   %b = load i32, i32* %P1
    374   %res = sub i32 %a, %b
    375   ret i32 %res
    376 }
    377 
    378 ; Can't forward ordered atomics.
    379 define i32 @non_local_fre3(i32* %P1) {
    380 ; CHECK-LABEL: @non_local_fre3(
    381 ; CHECK: load atomic i32, i32* %P1 acquire
    382 ; CHECK: ret i32 0
    383 ; CHECK: load atomic i32, i32* %P1 acquire
    384 ; CHECK: ret i32 %res
    385   %a = load atomic i32, i32* %P1 acquire, align 4
    386   %cmp = icmp eq i32 %a, 0
    387   br i1 %cmp, label %early, label %next
    388 early:
    389   ret i32 %a
    390 next:
    391   %b = load atomic i32, i32* %P1 acquire, align 4
    392   %res = sub i32 %a, %b
    393   ret i32 %res
    394 }
    395 
    396 declare void @clobber()
    397 
    398 ; unordered atomic to unordered atomic
    399 define i32 @non_local_pre(i32* %P1) {
    400 ; CHECK-LABEL: @non_local_pre(
    401 ; CHECK: load atomic i32, i32* %P1 unordered
    402 ; CHECK: load atomic i32, i32* %P1 unordered
    403 ; CHECK: %b = phi i32 [ %b.pre, %early ], [ %a, %0 ]
    404 ; CHECK: ret i32 %b
    405   %a = load atomic i32, i32* %P1 unordered, align 4
    406   %cmp = icmp eq i32 %a, 0
    407   br i1 %cmp, label %early, label %next
    408 early:
    409   call void @clobber()
    410   br label %next
    411 next:
    412   %b = load atomic i32, i32* %P1 unordered, align 4
    413   ret i32 %b
    414 }
    415 
    416 ; unordered atomic to non-atomic
    417 define i32 @non_local_pre2(i32* %P1) {
    418 ; CHECK-LABEL: @non_local_pre2(
    419 ; CHECK: load atomic i32, i32* %P1 unordered
    420 ; CHECK: load i32, i32* %P1
    421 ; CHECK: %b = phi i32 [ %b.pre, %early ], [ %a, %0 ]
    422 ; CHECK: ret i32 %b
    423   %a = load atomic i32, i32* %P1 unordered, align 4
    424   %cmp = icmp eq i32 %a, 0
    425   br i1 %cmp, label %early, label %next
    426 early:
    427   call void @clobber()
    428   br label %next
    429 next:
    430   %b = load i32, i32* %P1
    431   ret i32 %b
    432 }
    433 
    434 ; non-atomic to unordered atomic - can't forward!
    435 define i32 @non_local_pre3(i32* %P1) {
    436 ; CHECK-LABEL: @non_local_pre3(
    437 ; CHECK: %a = load i32, i32* %P1
    438 ; CHECK: %b = load atomic i32, i32* %P1 unordered
    439 ; CHECK: ret i32 %b
    440   %a = load i32, i32* %P1
    441   %cmp = icmp eq i32 %a, 0
    442   br i1 %cmp, label %early, label %next
    443 early:
    444   call void @clobber()
    445   br label %next
    446 next:
    447   %b = load atomic i32, i32* %P1 unordered, align 4
    448   ret i32 %b
    449 }
    450 
    451 ; ordered atomic to ordered atomic - can't forward
    452 define i32 @non_local_pre4(i32* %P1) {
    453 ; CHECK-LABEL: @non_local_pre4(
    454 ; CHECK: %a = load atomic i32, i32* %P1 seq_cst
    455 ; CHECK: %b = load atomic i32, i32* %P1 seq_cst
    456 ; CHECK: ret i32 %b
    457   %a = load atomic i32, i32* %P1 seq_cst, align 4
    458   %cmp = icmp eq i32 %a, 0
    459   br i1 %cmp, label %early, label %next
    460 early:
    461   call void @clobber()
    462   br label %next
    463 next:
    464   %b = load atomic i32, i32* %P1 seq_cst, align 4
    465   ret i32 %b
    466 }
    467 
    468 ; can't remove volatile on any path
    469 define i32 @non_local_pre5(i32* %P1) {
    470 ; CHECK-LABEL: @non_local_pre5(
    471 ; CHECK: %a = load atomic i32, i32* %P1 seq_cst
    472 ; CHECK: %b = load volatile i32, i32* %P1
    473 ; CHECK: ret i32 %b
    474   %a = load atomic i32, i32* %P1 seq_cst, align 4
    475   %cmp = icmp eq i32 %a, 0
    476   br i1 %cmp, label %early, label %next
    477 early:
    478   call void @clobber()
    479   br label %next
    480 next:
    481   %b = load volatile i32, i32* %P1
    482   ret i32 %b
    483 }
    484 
    485 
    486 ; ordered atomic to unordered atomic
    487 define i32 @non_local_pre6(i32* %P1) {
    488 ; CHECK-LABEL: @non_local_pre6(
    489 ; CHECK: load atomic i32, i32* %P1 seq_cst
    490 ; CHECK: load atomic i32, i32* %P1 unordered
    491 ; CHECK: %b = phi i32 [ %b.pre, %early ], [ %a, %0 ]
    492 ; CHECK: ret i32 %b
    493   %a = load atomic i32, i32* %P1 seq_cst, align 4
    494   %cmp = icmp eq i32 %a, 0
    495   br i1 %cmp, label %early, label %next
    496 early:
    497   call void @clobber()
    498   br label %next
    499 next:
    500   %b = load atomic i32, i32* %P1 unordered, align 4
    501   ret i32 %b
    502 }
    503 
    504