Home | History | Annotate | Download | only in LICM
      1 ; RUN: opt -S -loop-sink < %s | FileCheck %s
      2 ; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
      3 
      4 @g = global i32 0, align 4
      5 
      6 ;     b1
      7 ;    /  \
      8 ;   b2  b6
      9 ;  /  \  |
     10 ; b3  b4 |
     11 ;  \  /  |
     12 ;   b5   |
     13 ;    \  /
     14 ;     b7
     15 ; preheader: 1000
     16 ; b2: 15
     17 ; b3: 7
     18 ; b4: 7
     19 ; Sink load to b2
     20 ; CHECK: t1
     21 ; CHECK: .b2:
     22 ; CHECK: load i32, i32* @g
     23 ; CHECK: .b3:
     24 ; CHECK-NOT:  load i32, i32* @g
     25 define i32 @t1(i32, i32) #0 !prof !0 {
     26   %3 = icmp eq i32 %1, 0
     27   br i1 %3, label %.exit, label %.preheader
     28 
     29 .preheader:
     30   %invariant = load i32, i32* @g
     31   br label %.b1
     32 
     33 .b1:
     34   %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
     35   %c1 = icmp sgt i32 %iv, %0
     36   br i1 %c1, label %.b2, label %.b6, !prof !1
     37 
     38 .b2:
     39   %c2 = icmp sgt i32 %iv, 1
     40   br i1 %c2, label %.b3, label %.b4
     41 
     42 .b3:
     43   %t3 = sub nsw i32 %invariant, %iv
     44   br label %.b5
     45 
     46 .b4:
     47   %t4 = add nsw i32 %invariant, %iv
     48   br label %.b5
     49 
     50 .b5:
     51   %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
     52   %t5 = mul nsw i32 %p5, 5
     53   br label %.b7
     54 
     55 .b6:
     56   %t6 = add nsw i32 %iv, 100
     57   br label %.b7
     58 
     59 .b7:
     60   %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
     61   %t7 = add nuw nsw i32 %iv, 1
     62   %c7 = icmp eq i32 %t7, %p7
     63   br i1 %c7, label %.b1, label %.exit, !prof !3
     64 
     65 .exit:
     66   ret i32 10
     67 }
     68 
     69 ;     b1
     70 ;    /  \
     71 ;   b2  b6
     72 ;  /  \  |
     73 ; b3  b4 |
     74 ;  \  /  |
     75 ;   b5   |
     76 ;    \  /
     77 ;     b7
     78 ; preheader: 500
     79 ; b1: 16016
     80 ; b3: 8
     81 ; b6: 8
     82 ; Sink load to b3 and b6
     83 ; CHECK: t2
     84 ; CHECK: .preheader:
     85 ; CHECK-NOT: load i32, i32* @g
     86 ; CHECK: .b3:
     87 ; CHECK: load i32, i32* @g
     88 ; CHECK: .b4:
     89 ; CHECK: .b6:
     90 ; CHECK: load i32, i32* @g
     91 ; CHECK: .b7:
     92 define i32 @t2(i32, i32) #0 !prof !0 {
     93   %3 = icmp eq i32 %1, 0
     94   br i1 %3, label %.exit, label %.preheader
     95 
     96 .preheader:
     97   %invariant = load i32, i32* @g
     98   br label %.b1
     99 
    100 .b1:
    101   %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
    102   %c1 = icmp sgt i32 %iv, %0
    103   br i1 %c1, label %.b2, label %.b6, !prof !2
    104 
    105 .b2:
    106   %c2 = icmp sgt i32 %iv, 1
    107   br i1 %c2, label %.b3, label %.b4, !prof !1
    108 
    109 .b3:
    110   %t3 = sub nsw i32 %invariant, %iv
    111   br label %.b5
    112 
    113 .b4:
    114   %t4 = add nsw i32 5, %iv
    115   br label %.b5
    116 
    117 .b5:
    118   %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
    119   %t5 = mul nsw i32 %p5, 5
    120   br label %.b7
    121 
    122 .b6:
    123   %t6 = add nsw i32 %iv, %invariant
    124   br label %.b7
    125 
    126 .b7:
    127   %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
    128   %t7 = add nuw nsw i32 %iv, 1
    129   %c7 = icmp eq i32 %t7, %p7
    130   br i1 %c7, label %.b1, label %.exit, !prof !3
    131 
    132 .exit:
    133   ret i32 10
    134 }
    135 
    136 ;     b1
    137 ;    /  \
    138 ;   b2  b6
    139 ;  /  \  |
    140 ; b3  b4 |
    141 ;  \  /  |
    142 ;   b5   |
    143 ;    \  /
    144 ;     b7
    145 ; preheader: 500
    146 ; b3: 8
    147 ; b5: 16008
    148 ; Do not sink load from preheader.
    149 ; CHECK: t3
    150 ; CHECK: .preheader:
    151 ; CHECK: load i32, i32* @g
    152 ; CHECK: .b1:
    153 ; CHECK-NOT: load i32, i32* @g
    154 define i32 @t3(i32, i32) #0 !prof !0 {
    155   %3 = icmp eq i32 %1, 0
    156   br i1 %3, label %.exit, label %.preheader
    157 
    158 .preheader:
    159   %invariant = load i32, i32* @g
    160   br label %.b1
    161 
    162 .b1:
    163   %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
    164   %c1 = icmp sgt i32 %iv, %0
    165   br i1 %c1, label %.b2, label %.b6, !prof !2
    166 
    167 .b2:
    168   %c2 = icmp sgt i32 %iv, 1
    169   br i1 %c2, label %.b3, label %.b4, !prof !1
    170 
    171 .b3:
    172   %t3 = sub nsw i32 %invariant, %iv
    173   br label %.b5
    174 
    175 .b4:
    176   %t4 = add nsw i32 5, %iv
    177   br label %.b5
    178 
    179 .b5:
    180   %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
    181   %t5 = mul nsw i32 %p5, %invariant
    182   br label %.b7
    183 
    184 .b6:
    185   %t6 = add nsw i32 %iv, 5
    186   br label %.b7
    187 
    188 .b7:
    189   %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
    190   %t7 = add nuw nsw i32 %iv, 1
    191   %c7 = icmp eq i32 %t7, %p7
    192   br i1 %c7, label %.b1, label %.exit, !prof !3
    193 
    194 .exit:
    195   ret i32 10
    196 }
    197 
    198 ; For single-BB loop with <=1 avg trip count, sink load to b1
    199 ; CHECK: t4
    200 ; CHECK: .preheader:
    201 ; CHECK-NOT: load i32, i32* @g
    202 ; CHECK: .b1:
    203 ; CHECK: load i32, i32* @g
    204 ; CHECK: .exit:
    205 define i32 @t4(i32, i32) #0 !prof !0 {
    206 .preheader:
    207   %invariant = load i32, i32* @g
    208   br label %.b1
    209 
    210 .b1:
    211   %iv = phi i32 [ %t1, %.b1 ], [ 0, %.preheader ]
    212   %t1 = add nsw i32 %invariant, %iv
    213   %c1 = icmp sgt i32 %iv, %0
    214   br i1 %c1, label %.b1, label %.exit, !prof !1
    215 
    216 .exit:
    217   ret i32 10
    218 }
    219 
    220 ;     b1
    221 ;    /  \
    222 ;   b2  b6
    223 ;  /  \  |
    224 ; b3  b4 |
    225 ;  \  /  |
    226 ;   b5   |
    227 ;    \  /
    228 ;     b7
    229 ; preheader: 1000
    230 ; b2: 15
    231 ; b3: 7
    232 ; b4: 7
    233 ; There is alias store in loop, do not sink load
    234 ; CHECK: t5
    235 ; CHECK: .preheader:
    236 ; CHECK: load i32, i32* @g
    237 ; CHECK: .b1:
    238 ; CHECK-NOT: load i32, i32* @g
    239 define i32 @t5(i32, i32*) #0 !prof !0 {
    240   %3 = icmp eq i32 %0, 0
    241   br i1 %3, label %.exit, label %.preheader
    242 
    243 .preheader:
    244   %invariant = load i32, i32* @g
    245   br label %.b1
    246 
    247 .b1:
    248   %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
    249   %c1 = icmp sgt i32 %iv, %0
    250   br i1 %c1, label %.b2, label %.b6, !prof !1
    251 
    252 .b2:
    253   %c2 = icmp sgt i32 %iv, 1
    254   br i1 %c2, label %.b3, label %.b4
    255 
    256 .b3:
    257   %t3 = sub nsw i32 %invariant, %iv
    258   br label %.b5
    259 
    260 .b4:
    261   %t4 = add nsw i32 %invariant, %iv
    262   br label %.b5
    263 
    264 .b5:
    265   %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
    266   %t5 = mul nsw i32 %p5, 5
    267   br label %.b7
    268 
    269 .b6:
    270   %t6 = call i32 @foo()
    271   br label %.b7
    272 
    273 .b7:
    274   %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
    275   %t7 = add nuw nsw i32 %iv, 1
    276   %c7 = icmp eq i32 %t7, %p7
    277   br i1 %c7, label %.b1, label %.exit, !prof !3
    278 
    279 .exit:
    280   ret i32 10
    281 }
    282 
    283 ;     b1
    284 ;    /  \
    285 ;   b2  b6
    286 ;  /  \  |
    287 ; b3  b4 |
    288 ;  \  /  |
    289 ;   b5   |
    290 ;    \  /
    291 ;     b7
    292 ; preheader: 1000
    293 ; b2: 15
    294 ; b3: 7
    295 ; b4: 7
    296 ; Regardless of aliasing store in loop this load from constant memory can be sunk.
    297 ; CHECK: t5_const_memory
    298 ; CHECK: .preheader:
    299 ; CHECK-NOT: load i32, i32* @g_const
    300 ; CHECK: .b2:
    301 ; CHECK: load i32, i32* @g_const
    302 ; CHECK: br i1 %c2, label %.b3, label %.b4
    303 define i32 @t5_const_memory(i32, i32*) #0 !prof !0 {
    304   %3 = icmp eq i32 %0, 0
    305   br i1 %3, label %.exit, label %.preheader
    306 
    307 .preheader:
    308   %invariant = load i32, i32* @g_const
    309   br label %.b1
    310 
    311 .b1:
    312   %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
    313   %c1 = icmp sgt i32 %iv, %0
    314   br i1 %c1, label %.b2, label %.b6, !prof !1
    315 
    316 .b2:
    317   %c2 = icmp sgt i32 %iv, 1
    318   br i1 %c2, label %.b3, label %.b4
    319 
    320 .b3:
    321   %t3 = sub nsw i32 %invariant, %iv
    322   br label %.b5
    323 
    324 .b4:
    325   %t4 = add nsw i32 %invariant, %iv
    326   br label %.b5
    327 
    328 .b5:
    329   %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
    330   %t5 = mul nsw i32 %p5, 5
    331   br label %.b7
    332 
    333 .b6:
    334   %t6 = call i32 @foo()
    335   br label %.b7
    336 
    337 .b7:
    338   %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
    339   %t7 = add nuw nsw i32 %iv, 1
    340   %c7 = icmp eq i32 %t7, %p7
    341   br i1 %c7, label %.b1, label %.exit, !prof !3
    342 
    343 .exit:
    344   ret i32 10
    345 }
    346 
    347 ;     b1
    348 ;    /  \
    349 ;   b2  b3
    350 ;    \  /
    351 ;     b4
    352 ; preheader: 1000
    353 ; b2: 15
    354 ; b3: 7
    355 ; Do not sink unordered atomic load to b2
    356 ; CHECK: t6
    357 ; CHECK: .preheader:
    358 ; CHECK:  load atomic i32, i32* @g unordered, align 4
    359 ; CHECK: .b2:
    360 ; CHECK-NOT: load atomic i32, i32* @g unordered, align 4
    361 define i32 @t6(i32, i32) #0 !prof !0 {
    362   %3 = icmp eq i32 %1, 0
    363   br i1 %3, label %.exit, label %.preheader
    364 
    365 .preheader:
    366   %invariant = load atomic i32, i32* @g unordered, align 4
    367   br label %.b1
    368 
    369 .b1:
    370   %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ]
    371   %c1 = icmp sgt i32 %iv, %0
    372   br i1 %c1, label %.b2, label %.b3, !prof !1
    373 
    374 .b2:
    375   %t1 = add nsw i32 %invariant, %iv
    376   br label %.b4
    377 
    378 .b3:
    379   %t2 = add nsw i32 %iv, 100
    380   br label %.b4
    381 
    382 .b4:
    383   %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ]
    384   %t3 = add nuw nsw i32 %iv, 1
    385   %c2 = icmp eq i32 %t3, %p1
    386   br i1 %c2, label %.b1, label %.exit, !prof !3
    387 
    388 .exit:
    389   ret i32 10
    390 }
    391 
    392 @g_const = constant i32 0, align 4
    393 
    394 ;     b1
    395 ;    /  \
    396 ;   b2  b3
    397 ;    \  /
    398 ;     b4
    399 ; preheader: 1000
    400 ; b2: 0.5
    401 ; b3: 999.5
    402 ; Sink unordered atomic load to b2. It is allowed to sink into loop unordered
    403 ; load from constant.
    404 ; CHECK: t7
    405 ; CHECK: .preheader:
    406 ; CHECK-NOT:  load atomic i32, i32* @g_const unordered, align 4
    407 ; CHECK: .b2:
    408 ; CHECK: load atomic i32, i32* @g_const unordered, align 4
    409 define i32 @t7(i32, i32) #0 !prof !0 {
    410   %3 = icmp eq i32 %1, 0
    411   br i1 %3, label %.exit, label %.preheader
    412 
    413 .preheader:
    414   %invariant = load atomic i32, i32* @g_const unordered, align 4
    415   br label %.b1
    416 
    417 .b1:
    418   %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ]
    419   %c1 = icmp sgt i32 %iv, %0
    420   br i1 %c1, label %.b2, label %.b3, !prof !1
    421 
    422 .b2:
    423   %t1 = add nsw i32 %invariant, %iv
    424   br label %.b4
    425 
    426 .b3:
    427   %t2 = add nsw i32 %iv, 100
    428   br label %.b4
    429 
    430 .b4:
    431   %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ]
    432   %t3 = add nuw nsw i32 %iv, 1
    433   %c2 = icmp eq i32 %t3, %p1
    434   br i1 %c2, label %.b1, label %.exit, !prof !3
    435 
    436 .exit:
    437   ret i32 10
    438 }
    439 
    440 declare i32 @foo()
    441 
    442 !0 = !{!"function_entry_count", i64 1}
    443 !1 = !{!"branch_weights", i32 1, i32 2000}
    444 !2 = !{!"branch_weights", i32 2000, i32 1}
    445 !3 = !{!"branch_weights", i32 100, i32 1}
    446