Home | History | Annotate | Download | only in GVN
      1 ; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
      2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
      3 
      4 define i32 @test1(i32* %p, i1 %C) {
      5 ; CHECK: @test1
      6 block1:
      7 	br i1 %C, label %block2, label %block3
      8 
      9 block2:
     10  br label %block4
     11 ; CHECK: block2:
     12 ; CHECK-NEXT: load i32* %p
     13 
     14 block3:
     15   store i32 0, i32* %p
     16   br label %block4
     17 
     18 block4:
     19   %PRE = load i32* %p
     20   ret i32 %PRE
     21 ; CHECK: block4:
     22 ; CHECK-NEXT: phi i32
     23 ; CHECK-NEXT: ret i32
     24 }
     25 
     26 ; This is a simple phi translation case.
     27 define i32 @test2(i32* %p, i32* %q, i1 %C) {
     28 ; CHECK: @test2
     29 block1:
     30 	br i1 %C, label %block2, label %block3
     31 
     32 block2:
     33  br label %block4
     34 ; CHECK: block2:
     35 ; CHECK-NEXT: load i32* %q
     36 
     37 block3:
     38   store i32 0, i32* %p
     39   br label %block4
     40 
     41 block4:
     42   %P2 = phi i32* [%p, %block3], [%q, %block2]
     43   %PRE = load i32* %P2
     44   ret i32 %PRE
     45 ; CHECK: block4:
     46 ; CHECK-NEXT: phi i32 [
     47 ; CHECK-NOT: load
     48 ; CHECK: ret i32
     49 }
     50 
     51 ; This is a PRE case that requires phi translation through a GEP.
     52 define i32 @test3(i32* %p, i32* %q, i32** %Hack, i1 %C) {
     53 ; CHECK: @test3
     54 block1:
     55   %B = getelementptr i32* %q, i32 1
     56   store i32* %B, i32** %Hack
     57 	br i1 %C, label %block2, label %block3
     58 
     59 block2:
     60  br label %block4
     61 ; CHECK: block2:
     62 ; CHECK-NEXT: load i32* %B
     63 
     64 block3:
     65   %A = getelementptr i32* %p, i32 1
     66   store i32 0, i32* %A
     67   br label %block4
     68 
     69 block4:
     70   %P2 = phi i32* [%p, %block3], [%q, %block2]
     71   %P3 = getelementptr i32* %P2, i32 1
     72   %PRE = load i32* %P3
     73   ret i32 %PRE
     74 ; CHECK: block4:
     75 ; CHECK-NEXT: phi i32 [
     76 ; CHECK-NOT: load
     77 ; CHECK: ret i32
     78 }
     79 
     80 ;; Here the loaded address is available, but the computation is in 'block3'
     81 ;; which does not dominate 'block2'.
     82 define i32 @test4(i32* %p, i32* %q, i32** %Hack, i1 %C) {
     83 ; CHECK: @test4
     84 block1:
     85 	br i1 %C, label %block2, label %block3
     86 
     87 block2:
     88  br label %block4
     89 ; CHECK: block2:
     90 ; CHECK:   load i32*
     91 ; CHECK:   br label %block4
     92 
     93 block3:
     94   %B = getelementptr i32* %q, i32 1
     95   store i32* %B, i32** %Hack
     96 
     97   %A = getelementptr i32* %p, i32 1
     98   store i32 0, i32* %A
     99   br label %block4
    100 
    101 block4:
    102   %P2 = phi i32* [%p, %block3], [%q, %block2]
    103   %P3 = getelementptr i32* %P2, i32 1
    104   %PRE = load i32* %P3
    105   ret i32 %PRE
    106 ; CHECK: block4:
    107 ; CHECK-NEXT: phi i32 [
    108 ; CHECK-NOT: load
    109 ; CHECK: ret i32
    110 }
    111 
    112 ;void test5(int N, double *G) {
    113 ;  int j;
    114 ;  for (j = 0; j < N - 1; j++)
    115 ;    G[j] = G[j] + G[j+1];
    116 ;}
    117 
    118 define void @test5(i32 %N, double* nocapture %G) nounwind ssp {
    119 ; CHECK: @test5
    120 entry:
    121   %0 = add i32 %N, -1           
    122   %1 = icmp sgt i32 %0, 0       
    123   br i1 %1, label %bb.nph, label %return
    124 
    125 bb.nph:                         
    126   %tmp = zext i32 %0 to i64     
    127   br label %bb
    128 
    129 ; CHECK: bb.nph:
    130 ; CHECK: load double*
    131 ; CHECK: br label %bb
    132 
    133 bb:             
    134   %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
    135   %tmp6 = add i64 %indvar, 1                    
    136   %scevgep = getelementptr double* %G, i64 %tmp6
    137   %scevgep7 = getelementptr double* %G, i64 %indvar
    138   %2 = load double* %scevgep7, align 8
    139   %3 = load double* %scevgep, align 8 
    140   %4 = fadd double %2, %3             
    141   store double %4, double* %scevgep7, align 8
    142   %exitcond = icmp eq i64 %tmp6, %tmp 
    143   br i1 %exitcond, label %return, label %bb
    144 
    145 ; Should only be one load in the loop.
    146 ; CHECK: bb:
    147 ; CHECK: load double*
    148 ; CHECK-NOT: load double*
    149 ; CHECK: br i1 %exitcond
    150 
    151 return:                               
    152   ret void
    153 }
    154 
    155 ;void test6(int N, double *G) {
    156 ;  int j;
    157 ;  for (j = 0; j < N - 1; j++)
    158 ;    G[j+1] = G[j] + G[j+1];
    159 ;}
    160 
    161 define void @test6(i32 %N, double* nocapture %G) nounwind ssp {
    162 ; CHECK: @test6
    163 entry:
    164   %0 = add i32 %N, -1           
    165   %1 = icmp sgt i32 %0, 0       
    166   br i1 %1, label %bb.nph, label %return
    167 
    168 bb.nph:                         
    169   %tmp = zext i32 %0 to i64     
    170   br label %bb
    171 
    172 ; CHECK: bb.nph:
    173 ; CHECK: load double*
    174 ; CHECK: br label %bb
    175 
    176 bb:             
    177   %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
    178   %tmp6 = add i64 %indvar, 1                    
    179   %scevgep = getelementptr double* %G, i64 %tmp6
    180   %scevgep7 = getelementptr double* %G, i64 %indvar
    181   %2 = load double* %scevgep7, align 8
    182   %3 = load double* %scevgep, align 8 
    183   %4 = fadd double %2, %3             
    184   store double %4, double* %scevgep, align 8
    185   %exitcond = icmp eq i64 %tmp6, %tmp 
    186   br i1 %exitcond, label %return, label %bb
    187 
    188 ; Should only be one load in the loop.
    189 ; CHECK: bb:
    190 ; CHECK: load double*
    191 ; CHECK-NOT: load double*
    192 ; CHECK: br i1 %exitcond
    193 
    194 return:                               
    195   ret void
    196 }
    197 
    198 ;void test7(int N, double* G) {
    199 ;  long j;
    200 ;  G[1] = 1;
    201 ;  for (j = 1; j < N - 1; j++)
    202 ;      G[j+1] = G[j] + G[j+1];
    203 ;}
    204 
    205 ; This requires phi translation of the adds.
    206 define void @test7(i32 %N, double* nocapture %G) nounwind ssp {
    207 entry:
    208   %0 = getelementptr inbounds double* %G, i64 1   
    209   store double 1.000000e+00, double* %0, align 8
    210   %1 = add i32 %N, -1                             
    211   %2 = icmp sgt i32 %1, 1                         
    212   br i1 %2, label %bb.nph, label %return
    213 
    214 bb.nph:                                           
    215   %tmp = sext i32 %1 to i64                       
    216   %tmp7 = add i64 %tmp, -1                        
    217   br label %bb
    218 
    219 bb:                                               
    220   %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ] 
    221   %tmp8 = add i64 %indvar, 2                      
    222   %scevgep = getelementptr double* %G, i64 %tmp8  
    223   %tmp9 = add i64 %indvar, 1                      
    224   %scevgep10 = getelementptr double* %G, i64 %tmp9 
    225   %3 = load double* %scevgep10, align 8           
    226   %4 = load double* %scevgep, align 8             
    227   %5 = fadd double %3, %4                         
    228   store double %5, double* %scevgep, align 8
    229   %exitcond = icmp eq i64 %tmp9, %tmp7            
    230   br i1 %exitcond, label %return, label %bb
    231 
    232 ; Should only be one load in the loop.
    233 ; CHECK: bb:
    234 ; CHECK: load double*
    235 ; CHECK-NOT: load double*
    236 ; CHECK: br i1 %exitcond
    237 
    238 return:                                           
    239   ret void
    240 }
    241 
    242 ;; Here the loaded address isn't available in 'block2' at all, requiring a new
    243 ;; GEP to be inserted into it.
    244 define i32 @test8(i32* %p, i32* %q, i32** %Hack, i1 %C) {
    245 ; CHECK: @test8
    246 block1:
    247 	br i1 %C, label %block2, label %block3
    248 
    249 block2:
    250  br label %block4
    251 ; CHECK: block2:
    252 ; CHECK:   load i32*
    253 ; CHECK:   br label %block4
    254 
    255 block3:
    256   %A = getelementptr i32* %p, i32 1
    257   store i32 0, i32* %A
    258   br label %block4
    259 
    260 block4:
    261   %P2 = phi i32* [%p, %block3], [%q, %block2]
    262   %P3 = getelementptr i32* %P2, i32 1
    263   %PRE = load i32* %P3
    264   ret i32 %PRE
    265 ; CHECK: block4:
    266 ; CHECK-NEXT: phi i32 [
    267 ; CHECK-NOT: load
    268 ; CHECK: ret i32
    269 }
    270 
    271 ;void test9(int N, double* G) {
    272 ;  long j;
    273 ;  for (j = 1; j < N - 1; j++)
    274 ;      G[j+1] = G[j] + G[j+1];
    275 ;}
    276 
    277 ; This requires phi translation of the adds.
    278 define void @test9(i32 %N, double* nocapture %G) nounwind ssp {
    279 entry:
    280   add i32 0, 0
    281   %1 = add i32 %N, -1                             
    282   %2 = icmp sgt i32 %1, 1                         
    283   br i1 %2, label %bb.nph, label %return
    284 
    285 bb.nph:                                           
    286   %tmp = sext i32 %1 to i64                       
    287   %tmp7 = add i64 %tmp, -1                        
    288   br label %bb
    289 
    290 ; CHECK: bb.nph:
    291 ; CHECK:   load double*
    292 ; CHECK:   br label %bb
    293 
    294 bb:                                               
    295   %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ] 
    296   %tmp8 = add i64 %indvar, 2                      
    297   %scevgep = getelementptr double* %G, i64 %tmp8  
    298   %tmp9 = add i64 %indvar, 1                      
    299   %scevgep10 = getelementptr double* %G, i64 %tmp9 
    300   %3 = load double* %scevgep10, align 8           
    301   %4 = load double* %scevgep, align 8             
    302   %5 = fadd double %3, %4                         
    303   store double %5, double* %scevgep, align 8
    304   %exitcond = icmp eq i64 %tmp9, %tmp7            
    305   br i1 %exitcond, label %return, label %bb
    306 
    307 ; Should only be one load in the loop.
    308 ; CHECK: bb:
    309 ; CHECK: load double*
    310 ; CHECK-NOT: load double*
    311 ; CHECK: br i1 %exitcond
    312 
    313 return:                                           
    314   ret void
    315 }
    316 
    317 ;void test10(int N, double* G) {
    318 ;  long j;
    319 ;  for (j = 1; j < N - 1; j++)
    320 ;      G[j] = G[j] + G[j+1] + G[j-1];
    321 ;}
    322 
    323 ; PR5501
    324 define void @test10(i32 %N, double* nocapture %G) nounwind ssp {
    325 entry:
    326   %0 = add i32 %N, -1
    327   %1 = icmp sgt i32 %0, 1
    328   br i1 %1, label %bb.nph, label %return
    329 
    330 bb.nph:
    331   %tmp = sext i32 %0 to i64
    332   %tmp8 = add i64 %tmp, -1
    333   br label %bb
    334 ; CHECK: bb.nph:
    335 ; CHECK:   load double*
    336 ; CHECK:   load double*
    337 ; CHECK:   br label %bb
    338 
    339 
    340 bb:
    341   %indvar = phi i64 [ 0, %bb.nph ], [ %tmp11, %bb ]
    342   %scevgep = getelementptr double* %G, i64 %indvar
    343   %tmp9 = add i64 %indvar, 2
    344   %scevgep10 = getelementptr double* %G, i64 %tmp9
    345   %tmp11 = add i64 %indvar, 1
    346   %scevgep12 = getelementptr double* %G, i64 %tmp11
    347   %2 = load double* %scevgep12, align 8
    348   %3 = load double* %scevgep10, align 8
    349   %4 = fadd double %2, %3
    350   %5 = load double* %scevgep, align 8
    351   %6 = fadd double %4, %5
    352   store double %6, double* %scevgep12, align 8
    353   %exitcond = icmp eq i64 %tmp11, %tmp8
    354   br i1 %exitcond, label %return, label %bb
    355 
    356 ; Should only be one load in the loop.
    357 ; CHECK: bb:
    358 ; CHECK: load double*
    359 ; CHECK-NOT: load double*
    360 ; CHECK: br i1 %exitcond
    361 
    362 return:
    363   ret void
    364 }
    365 
    366 ; Test critical edge splitting.
    367 define i32 @test11(i32* %p, i1 %C, i32 %N) {
    368 ; CHECK: @test11
    369 block1:
    370         br i1 %C, label %block2, label %block3
    371 
    372 block2:
    373  %cond = icmp sgt i32 %N, 1
    374  br i1 %cond, label %block4, label %block5
    375 ; CHECK: load i32* %p
    376 ; CHECK-NEXT: br label %block4
    377 
    378 block3:
    379   store i32 0, i32* %p
    380   br label %block4
    381 
    382 block4:
    383   %PRE = load i32* %p
    384   br label %block5
    385 
    386 block5:
    387   %ret = phi i32 [ 0, %block2 ], [ %PRE, %block4 ]
    388   ret i32 %ret
    389 ; CHECK: block4:
    390 ; CHECK-NEXT: phi i32
    391 }
    392