Home | History | Annotate | Download | only in GVN
      1 ; RUN: opt < %s -basicaa -gvn -S -die | FileCheck %s
      2 
      3 ; 32-bit little endian target.
      4 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
      5 
      6 ;; Trivial RLE test.
      7 define i32 @test0(i32 %V, i32* %P) {
      8   store i32 %V, i32* %P
      9 
     10   %A = load i32* %P
     11   ret i32 %A
     12 ; CHECK: @test0
     13 ; CHECK: ret i32 %V
     14 }
     15 
     16 
     17 ;;===----------------------------------------------------------------------===;;
     18 ;; Tests for crashers
     19 ;;===----------------------------------------------------------------------===;;
     20 
     21 ;; PR5016
     22 define i8 @crash0({i32, i32} %A, {i32, i32}* %P) {
     23   store {i32, i32} %A, {i32, i32}* %P
     24   %X = bitcast {i32, i32}* %P to i8*
     25   %Y = load i8* %X
     26   ret i8 %Y
     27 }
     28 
     29 
     30 ;;===----------------------------------------------------------------------===;;
     31 ;; Store -> Load  and  Load -> Load forwarding where src and dst are different
     32 ;; types, but where the base pointer is a must alias.
     33 ;;===----------------------------------------------------------------------===;;
     34 
     35 ;; i32 -> f32 forwarding.
     36 define float @coerce_mustalias1(i32 %V, i32* %P) {
     37   store i32 %V, i32* %P
     38    
     39   %P2 = bitcast i32* %P to float*
     40 
     41   %A = load float* %P2
     42   ret float %A
     43 ; CHECK: @coerce_mustalias1
     44 ; CHECK-NOT: load
     45 ; CHECK: ret float 
     46 }
     47 
     48 ;; i32* -> float forwarding.
     49 define float @coerce_mustalias2(i32* %V, i32** %P) {
     50   store i32* %V, i32** %P
     51    
     52   %P2 = bitcast i32** %P to float*
     53 
     54   %A = load float* %P2
     55   ret float %A
     56 ; CHECK: @coerce_mustalias2
     57 ; CHECK-NOT: load
     58 ; CHECK: ret float 
     59 }
     60 
     61 ;; float -> i32* forwarding.
     62 define i32* @coerce_mustalias3(float %V, float* %P) {
     63   store float %V, float* %P
     64    
     65   %P2 = bitcast float* %P to i32**
     66 
     67   %A = load i32** %P2
     68   ret i32* %A
     69 ; CHECK: @coerce_mustalias3
     70 ; CHECK-NOT: load
     71 ; CHECK: ret i32* 
     72 }
     73 
     74 ;; i32 -> f32 load forwarding.
     75 define float @coerce_mustalias4(i32* %P, i1 %cond) {
     76   %A = load i32* %P
     77   
     78   %P2 = bitcast i32* %P to float*
     79   %B = load float* %P2
     80   br i1 %cond, label %T, label %F
     81 T:
     82   ret float %B
     83   
     84 F:
     85   %X = bitcast i32 %A to float
     86   ret float %X
     87 
     88 ; CHECK: @coerce_mustalias4
     89 ; CHECK: %A = load i32* %P
     90 ; CHECK-NOT: load
     91 ; CHECK: ret float
     92 ; CHECK: F:
     93 }
     94 
     95 ;; i32 -> i8 forwarding
     96 define i8 @coerce_mustalias5(i32 %V, i32* %P) {
     97   store i32 %V, i32* %P
     98    
     99   %P2 = bitcast i32* %P to i8*
    100 
    101   %A = load i8* %P2
    102   ret i8 %A
    103 ; CHECK: @coerce_mustalias5
    104 ; CHECK-NOT: load
    105 ; CHECK: ret i8
    106 }
    107 
    108 ;; i64 -> float forwarding
    109 define float @coerce_mustalias6(i64 %V, i64* %P) {
    110   store i64 %V, i64* %P
    111    
    112   %P2 = bitcast i64* %P to float*
    113 
    114   %A = load float* %P2
    115   ret float %A
    116 ; CHECK: @coerce_mustalias6
    117 ; CHECK-NOT: load
    118 ; CHECK: ret float
    119 }
    120 
    121 ;; i64 -> i8* (32-bit) forwarding
    122 define i8* @coerce_mustalias7(i64 %V, i64* %P) {
    123   store i64 %V, i64* %P
    124    
    125   %P2 = bitcast i64* %P to i8**
    126 
    127   %A = load i8** %P2
    128   ret i8* %A
    129 ; CHECK: @coerce_mustalias7
    130 ; CHECK-NOT: load
    131 ; CHECK: ret i8*
    132 }
    133 
    134 ; memset -> i16 forwarding.
    135 define signext i16 @memset_to_i16_local(i16* %A) nounwind ssp {
    136 entry:
    137   %conv = bitcast i16* %A to i8* 
    138   tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i32 1, i1 false)
    139   %arrayidx = getelementptr inbounds i16* %A, i64 42
    140   %tmp2 = load i16* %arrayidx
    141   ret i16 %tmp2
    142 ; CHECK: @memset_to_i16_local
    143 ; CHECK-NOT: load
    144 ; CHECK: ret i16 257
    145 }
    146 
    147 ; memset -> float forwarding.
    148 define float @memset_to_float_local(float* %A, i8 %Val) nounwind ssp {
    149 entry:
    150   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
    151   tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 %Val, i64 400, i32 1, i1 false)
    152   %arrayidx = getelementptr inbounds float* %A, i64 42 ; <float*> [#uses=1]
    153   %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
    154   ret float %tmp2
    155 ; CHECK: @memset_to_float_local
    156 ; CHECK-NOT: load
    157 ; CHECK: zext
    158 ; CHECK-NEXT: shl
    159 ; CHECK-NEXT: or
    160 ; CHECK-NEXT: shl
    161 ; CHECK-NEXT: or
    162 ; CHECK-NEXT: bitcast
    163 ; CHECK-NEXT: ret float
    164 }
    165 
    166 ;; non-local memset -> i16 load forwarding.
    167 define i16 @memset_to_i16_nonlocal0(i16* %P, i1 %cond) {
    168   %P3 = bitcast i16* %P to i8*
    169   br i1 %cond, label %T, label %F
    170 T:
    171   tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 1, i64 400, i32 1, i1 false)
    172   br label %Cont
    173   
    174 F:
    175   tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 2, i64 400, i32 1, i1 false)
    176   br label %Cont
    177 
    178 Cont:
    179   %P2 = getelementptr i16* %P, i32 4
    180   %A = load i16* %P2
    181   ret i16 %A
    182 
    183 ; CHECK: @memset_to_i16_nonlocal0
    184 ; CHECK: Cont:
    185 ; CHECK-NEXT:   %A = phi i16 [ 514, %F ], [ 257, %T ]
    186 ; CHECK-NOT: load
    187 ; CHECK: ret i16 %A
    188 }
    189 
    190 @GCst = constant {i32, float, i32 } { i32 42, float 14., i32 97 }
    191 
    192 ; memset -> float forwarding.
    193 define float @memcpy_to_float_local(float* %A) nounwind ssp {
    194 entry:
    195   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
    196   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i32 1, i1 false)
    197   %arrayidx = getelementptr inbounds float* %A, i64 1 ; <float*> [#uses=1]
    198   %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
    199   ret float %tmp2
    200 ; CHECK: @memcpy_to_float_local
    201 ; CHECK-NOT: load
    202 ; CHECK: ret float 1.400000e+01
    203 }
    204 
    205 
    206 
    207 ;; non-local i32/float -> i8 load forwarding.
    208 define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {
    209   %P2 = bitcast i32* %P to float*
    210   %P3 = bitcast i32* %P to i8*
    211   br i1 %cond, label %T, label %F
    212 T:
    213   store i32 42, i32* %P
    214   br label %Cont
    215   
    216 F:
    217   store float 1.0, float* %P2
    218   br label %Cont
    219 
    220 Cont:
    221   %A = load i8* %P3
    222   ret i8 %A
    223 
    224 ; CHECK: @coerce_mustalias_nonlocal0
    225 ; CHECK: Cont:
    226 ; CHECK:   %A = phi i8 [
    227 ; CHECK-NOT: load
    228 ; CHECK: ret i8 %A
    229 }
    230 
    231 
    232 ;; non-local i32/float -> i8 load forwarding.  This also tests that the "P3"
    233 ;; bitcast equivalence can be properly phi translated.
    234 define i8 @coerce_mustalias_nonlocal1(i32* %P, i1 %cond) {
    235   %P2 = bitcast i32* %P to float*
    236   br i1 %cond, label %T, label %F
    237 T:
    238   store i32 42, i32* %P
    239   br label %Cont
    240   
    241 F:
    242   store float 1.0, float* %P2
    243   br label %Cont
    244 
    245 Cont:
    246   %P3 = bitcast i32* %P to i8*
    247   %A = load i8* %P3
    248   ret i8 %A
    249 
    250 ;; FIXME: This is disabled because this caused a miscompile in the llvm-gcc
    251 ;; bootstrap, see r82411
    252 ;
    253 ; HECK: @coerce_mustalias_nonlocal1
    254 ; HECK: Cont:
    255 ; HECK:   %A = phi i8 [
    256 ; HECK-NOT: load
    257 ; HECK: ret i8 %A
    258 }
    259 
    260 
    261 ;; non-local i32 -> i8 partial redundancy load forwarding.
    262 define i8 @coerce_mustalias_pre0(i32* %P, i1 %cond) {
    263   %P3 = bitcast i32* %P to i8*
    264   br i1 %cond, label %T, label %F
    265 T:
    266   store i32 42, i32* %P
    267   br label %Cont
    268   
    269 F:
    270   br label %Cont
    271 
    272 Cont:
    273   %A = load i8* %P3
    274   ret i8 %A
    275 
    276 ; CHECK: @coerce_mustalias_pre0
    277 ; CHECK: F:
    278 ; CHECK:   load i8* %P3
    279 ; CHECK: Cont:
    280 ; CHECK:   %A = phi i8 [
    281 ; CHECK-NOT: load
    282 ; CHECK: ret i8 %A
    283 }
    284 
    285 ;;===----------------------------------------------------------------------===;;
    286 ;; Store -> Load  and  Load -> Load forwarding where src and dst are different
    287 ;; types, and the reload is an offset from the store pointer.
    288 ;;===----------------------------------------------------------------------===;;
    289 
    290 ;; i32 -> i8 forwarding.
    291 ;; PR4216
    292 define i8 @coerce_offset0(i32 %V, i32* %P) {
    293   store i32 %V, i32* %P
    294    
    295   %P2 = bitcast i32* %P to i8*
    296   %P3 = getelementptr i8* %P2, i32 2
    297 
    298   %A = load i8* %P3
    299   ret i8 %A
    300 ; CHECK: @coerce_offset0
    301 ; CHECK-NOT: load
    302 ; CHECK: ret i8
    303 }
    304 
    305 ;; non-local i32/float -> i8 load forwarding.
    306 define i8 @coerce_offset_nonlocal0(i32* %P, i1 %cond) {
    307   %P2 = bitcast i32* %P to float*
    308   %P3 = bitcast i32* %P to i8*
    309   %P4 = getelementptr i8* %P3, i32 2
    310   br i1 %cond, label %T, label %F
    311 T:
    312   store i32 42, i32* %P
    313   br label %Cont
    314   
    315 F:
    316   store float 1.0, float* %P2
    317   br label %Cont
    318 
    319 Cont:
    320   %A = load i8* %P4
    321   ret i8 %A
    322 
    323 ; CHECK: @coerce_offset_nonlocal0
    324 ; CHECK: Cont:
    325 ; CHECK:   %A = phi i8 [
    326 ; CHECK-NOT: load
    327 ; CHECK: ret i8 %A
    328 }
    329 
    330 
    331 ;; non-local i32 -> i8 partial redundancy load forwarding.
    332 define i8 @coerce_offset_pre0(i32* %P, i1 %cond) {
    333   %P3 = bitcast i32* %P to i8*
    334   %P4 = getelementptr i8* %P3, i32 2
    335   br i1 %cond, label %T, label %F
    336 T:
    337   store i32 42, i32* %P
    338   br label %Cont
    339   
    340 F:
    341   br label %Cont
    342 
    343 Cont:
    344   %A = load i8* %P4
    345   ret i8 %A
    346 
    347 ; CHECK: @coerce_offset_pre0
    348 ; CHECK: F:
    349 ; CHECK:   load i8* %P4
    350 ; CHECK: Cont:
    351 ; CHECK:   %A = phi i8 [
    352 ; CHECK-NOT: load
    353 ; CHECK: ret i8 %A
    354 }
    355 
    356 define i32 @chained_load(i32** %p) {
    357 block1:
    358   %A = alloca i32*
    359 
    360   %z = load i32** %p
    361   store i32* %z, i32** %A
    362   br i1 true, label %block2, label %block3
    363 
    364 block2:
    365  %a = load i32** %p
    366  br label %block4
    367 
    368 block3:
    369   %b = load i32** %p
    370   br label %block4
    371 
    372 block4:
    373   %c = load i32** %p
    374   %d = load i32* %c
    375   ret i32 %d
    376   
    377 ; CHECK: @chained_load
    378 ; CHECK: %z = load i32** %p
    379 ; CHECK-NOT: load
    380 ; CHECK: %d = load i32* %z
    381 ; CHECK-NEXT: ret i32 %d
    382 }
    383 
    384 
    385 declare i1 @cond() readonly
    386 declare i1 @cond2() readonly
    387 
    388 define i32 @phi_trans2() {
    389 ; CHECK: @phi_trans2
    390 entry:
    391   %P = alloca i32, i32 400
    392   br label %F1
    393   
    394 F1:
    395   %A = phi i32 [1, %entry], [2, %F]
    396   %cond2 = call i1 @cond()
    397   br i1 %cond2, label %T1, label %TY
    398   
    399 T1:
    400   %P2 = getelementptr i32* %P, i32 %A
    401   %x = load i32* %P2
    402   %cond = call i1 @cond2()
    403   br i1 %cond, label %TX, label %F
    404   
    405 F:
    406   %P3 = getelementptr i32* %P, i32 2
    407   store i32 17, i32* %P3
    408   
    409   store i32 42, i32* %P2  ; Provides "P[A]".
    410   br label %F1
    411 
    412 TX:
    413   ; This load should not be compiled to 'ret i32 42'.  An overly clever
    414   ; implementation of GVN would see that we're returning 17 if the loop
    415   ; executes once or 42 if it executes more than that, but we'd have to do
    416   ; loop restructuring to expose this, and GVN shouldn't do this sort of CFG
    417   ; transformation.
    418   
    419 ; CHECK: TX:
    420 ; CHECK: ret i32 %x
    421   ret i32 %x
    422 TY:
    423   ret i32 0
    424 }
    425 
    426 define i32 @phi_trans3(i32* %p) {
    427 ; CHECK: @phi_trans3
    428 block1:
    429   br i1 true, label %block2, label %block3
    430 
    431 block2:
    432  store i32 87, i32* %p
    433  br label %block4
    434 
    435 block3:
    436   %p2 = getelementptr i32* %p, i32 43
    437   store i32 97, i32* %p2
    438   br label %block4
    439 
    440 block4:
    441   %A = phi i32 [-1, %block2], [42, %block3]
    442   br i1 true, label %block5, label %exit
    443   
    444 ; CHECK: block4:
    445 ; CHECK-NEXT: %D = phi i32 [ 87, %block2 ], [ 97, %block3 ]  
    446 ; CHECK-NOT: load
    447 
    448 block5:
    449   %B = add i32 %A, 1
    450   br i1 true, label %block6, label %exit
    451   
    452 block6:
    453   %C = getelementptr i32* %p, i32 %B
    454   br i1 true, label %block7, label %exit
    455   
    456 block7:
    457   %D = load i32* %C
    458   ret i32 %D
    459   
    460 ; CHECK: block7:
    461 ; CHECK-NEXT: ret i32 %D
    462 
    463 exit:
    464   ret i32 -1
    465 }
    466 
    467 define i8 @phi_trans4(i8* %p) {
    468 ; CHECK: @phi_trans4
    469 entry:
    470   %X3 = getelementptr i8* %p, i32 192
    471   store i8 192, i8* %X3
    472   
    473   %X = getelementptr i8* %p, i32 4
    474   %Y = load i8* %X
    475   br label %loop
    476 
    477 loop:
    478   %i = phi i32 [4, %entry], [192, %loop]
    479   %X2 = getelementptr i8* %p, i32 %i
    480   %Y2 = load i8* %X2
    481   
    482 ; CHECK: loop:
    483 ; CHECK-NEXT: %Y2 = phi i8 [ %Y, %entry ], [ 0, %loop ]
    484 ; CHECK-NOT: load i8
    485   
    486   %cond = call i1 @cond2()
    487 
    488   %Z = bitcast i8 *%X3 to i32*
    489   store i32 0, i32* %Z
    490   br i1 %cond, label %loop, label %out
    491   
    492 out:
    493   %R = add i8 %Y, %Y2
    494   ret i8 %R
    495 }
    496 
    497 define i8 @phi_trans5(i8* %p) {
    498 ; CHECK: @phi_trans5
    499 entry:
    500   
    501   %X4 = getelementptr i8* %p, i32 2
    502   store i8 19, i8* %X4
    503   
    504   %X = getelementptr i8* %p, i32 4
    505   %Y = load i8* %X
    506   br label %loop
    507 
    508 loop:
    509   %i = phi i32 [4, %entry], [3, %cont]
    510   %X2 = getelementptr i8* %p, i32 %i
    511   %Y2 = load i8* %X2  ; Ensure this load is not being incorrectly replaced.
    512   %cond = call i1 @cond2()
    513   br i1 %cond, label %cont, label %out
    514 
    515 cont:
    516   %Z = getelementptr i8* %X2, i32 -1
    517   %Z2 = bitcast i8 *%Z to i32*
    518   store i32 50462976, i32* %Z2  ;; (1 << 8) | (2 << 16) | (3 << 24)
    519 
    520 
    521 ; CHECK: store i32
    522 ; CHECK-NEXT: getelementptr i8* %p, i32 3
    523 ; CHECK-NEXT: load i8*
    524   br label %loop
    525   
    526 out:
    527   %R = add i8 %Y, %Y2
    528   ret i8 %R
    529 }
    530 
    531 
    532 ; PR6642
    533 define i32 @memset_to_load() nounwind readnone {
    534 entry:
    535   %x = alloca [256 x i32], align 4                ; <[256 x i32]*> [#uses=2]
    536   %tmp = bitcast [256 x i32]* %x to i8*           ; <i8*> [#uses=1]
    537   call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 1024, i32 4, i1 false)
    538   %arraydecay = getelementptr inbounds [256 x i32]* %x, i32 0, i32 0 ; <i32*>
    539   %tmp1 = load i32* %arraydecay                   ; <i32> [#uses=1]
    540   ret i32 %tmp1
    541 ; CHECK: @memset_to_load
    542 ; CHECK: ret i32 0
    543 }
    544 
    545 
    546 ;;===----------------------------------------------------------------------===;;
    547 ;; Load -> Load forwarding in partial alias case.
    548 ;;===----------------------------------------------------------------------===;;
    549 
    550 define i32 @load_load_partial_alias(i8* %P) nounwind ssp {
    551 entry:
    552   %0 = bitcast i8* %P to i32*
    553   %tmp2 = load i32* %0
    554   %add.ptr = getelementptr inbounds i8* %P, i64 1
    555   %tmp5 = load i8* %add.ptr
    556   %conv = zext i8 %tmp5 to i32
    557   %add = add nsw i32 %tmp2, %conv
    558   ret i32 %add
    559 
    560 ; TEMPORARILYDISABLED: @load_load_partial_alias
    561 ; TEMPORARILYDISABLED: load i32*
    562 ; TEMPORARILYDISABLED-NOT: load
    563 ; TEMPORARILYDISABLED: lshr i32 {{.*}}, 8
    564 ; TEMPORARILYDISABLED-NOT: load
    565 ; TEMPORARILYDISABLED: trunc i32 {{.*}} to i8
    566 ; TEMPORARILYDISABLED-NOT: load
    567 ; TEMPORARILYDISABLED: ret i32
    568 }
    569 
    570 
    571 ; Cross block partial alias case.
    572 define i32 @load_load_partial_alias_cross_block(i8* %P) nounwind ssp {
    573 entry:
    574   %xx = bitcast i8* %P to i32*
    575   %x1 = load i32* %xx, align 4
    576   %cmp = icmp eq i32 %x1, 127
    577   br i1 %cmp, label %land.lhs.true, label %if.end
    578 
    579 land.lhs.true:                                    ; preds = %entry
    580   %arrayidx4 = getelementptr inbounds i8* %P, i64 1
    581   %tmp5 = load i8* %arrayidx4, align 1
    582   %conv6 = zext i8 %tmp5 to i32
    583   ret i32 %conv6
    584 
    585 if.end:
    586   ret i32 52
    587 ; TEMPORARILY_DISABLED: @load_load_partial_alias_cross_block
    588 ; TEMPORARILY_DISABLED: land.lhs.true:
    589 ; TEMPORARILY_DISABLED-NOT: load i8
    590 ; TEMPORARILY_DISABLED: ret i32 %conv6
    591 }
    592 
    593 
    594 ;;===----------------------------------------------------------------------===;;
    595 ;; Load Widening
    596 ;;===----------------------------------------------------------------------===;;
    597 
    598 %widening1 = type { i32, i8, i8, i8, i8 }
    599 
    600 @f = global %widening1 zeroinitializer, align 4
    601 
    602 define i32 @test_widening1(i8* %P) nounwind ssp noredzone {
    603 entry:
    604   %tmp = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 1), align 4
    605   %conv = zext i8 %tmp to i32
    606   %tmp1 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 2), align 1
    607   %conv2 = zext i8 %tmp1 to i32
    608   %add = add nsw i32 %conv, %conv2
    609   ret i32 %add
    610 ; CHECK: @test_widening1
    611 ; CHECK-NOT: load
    612 ; CHECK: load i16*
    613 ; CHECK-NOT: load
    614 ; CHECK-ret i32
    615 }
    616 
    617 define i32 @test_widening2() nounwind ssp noredzone {
    618 entry:
    619   %tmp = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 1), align 4
    620   %conv = zext i8 %tmp to i32
    621   %tmp1 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 2), align 1
    622   %conv2 = zext i8 %tmp1 to i32
    623   %add = add nsw i32 %conv, %conv2
    624 
    625   %tmp2 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 3), align 2
    626   %conv3 = zext i8 %tmp2 to i32
    627   %add2 = add nsw i32 %add, %conv3
    628 
    629   %tmp3 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 4), align 1
    630   %conv4 = zext i8 %tmp3 to i32
    631   %add3 = add nsw i32 %add2, %conv3
    632 
    633   ret i32 %add3
    634 ; CHECK: @test_widening2
    635 ; CHECK-NOT: load
    636 ; CHECK: load i32*
    637 ; CHECK-NOT: load
    638 ; CHECK-ret i32
    639 }
    640 
    641 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
    642 
    643 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
    644 
    645