Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -march=x86-64 < %s -block-placement-exit-block-bias=20 -no-phi-elim-live-out-early-exit | FileCheck %s
      2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      3 target triple = "x86_64-apple-macosx10.9.0"
      4 
      5 ; This is longest_match, the hot function from zlib's deflate implementation.
      6 
      7 %struct.internal_state = type { %struct.z_stream_s*, i32, i8*, i64, i8*, i32, i32, %struct.gz_header_s*, i32, i8, i32, i32, i32, i32, i8*, i64, i16*, i16*, i32, i32, i32, i32, i32, i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [573 x %struct.ct_data_s], [61 x %struct.ct_data_s], [39 x %struct.ct_data_s], %struct.tree_desc_s, %struct.tree_desc_s, %struct.tree_desc_s, [16 x i16], [573 x i32], i32, i32, [573 x i8], i8*, i32, i32, i16*, i64, i64, i32, i32, i16, i32, i64 }
      8 %struct.z_stream_s = type { i8*, i32, i64, i8*, i32, i64, i8*, %struct.internal_state*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8*, i32, i64, i64 }
      9 %struct.gz_header_s = type { i32, i64, i32, i32, i8*, i32, i32, i8*, i32, i8*, i32, i32, i32 }
     10 %struct.ct_data_s = type { %union.anon, %union.anon.0 }
     11 %union.anon = type { i16 }
     12 %union.anon.0 = type { i16 }
     13 %struct.tree_desc_s = type { %struct.ct_data_s*, i32, %struct.static_tree_desc_s* }
     14 %struct.static_tree_desc_s = type { i32 }
     15 
     16 ; CHECK-LABEL: longest_match:
     17 
     18 ; Verify that there are no spills or reloads in the loop exit block. This loop
     19 ; is mostly cold, only %do.cond125 and %land.rhs131 are hot.
     20 ; CHECK: %do.cond125
     21 ; CHECK-NOT: {{Spill|Reload}}
     22 ; CHECK: jbe
     23 
     24 ; Verify that block placement doesn't destroy source order. It's important that
     25 ; the two hot blocks are laid out close to each other.
     26 ; CHECK-NEXT: %land.rhs131
     27 ; CHECK: jne
     28 define i32 @longest_match(%struct.internal_state* nocapture %s, i32 %cur_match) nounwind {
     29 entry:
     30   %max_chain_length = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 31
     31   %0 = load i32, i32* %max_chain_length, align 4
     32   %window = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 14
     33   %1 = load i8*, i8** %window, align 8
     34   %strstart = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 27
     35   %2 = load i32, i32* %strstart, align 4
     36   %idx.ext = zext i32 %2 to i64
     37   %add.ptr = getelementptr inbounds i8, i8* %1, i64 %idx.ext
     38   %prev_length = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 30
     39   %3 = load i32, i32* %prev_length, align 4
     40   %nice_match1 = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 36
     41   %4 = load i32, i32* %nice_match1, align 4
     42   %w_size = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 11
     43   %5 = load i32, i32* %w_size, align 4
     44   %sub = add i32 %5, -262
     45   %cmp = icmp ugt i32 %2, %sub
     46   %sub6 = sub i32 %2, %sub
     47   %sub6. = select i1 %cmp, i32 %sub6, i32 0
     48   %prev7 = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 16
     49   %6 = load i16*, i16** %prev7, align 8
     50   %w_mask = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 13
     51   %7 = load i32, i32* %w_mask, align 4
     52   %add.ptr11.sum = add i64 %idx.ext, 258
     53   %add.ptr12 = getelementptr inbounds i8, i8* %1, i64 %add.ptr11.sum
     54   %sub13 = add nsw i32 %3, -1
     55   %idxprom = sext i32 %sub13 to i64
     56   %add.ptr.sum = add i64 %idxprom, %idx.ext
     57   %arrayidx = getelementptr inbounds i8, i8* %1, i64 %add.ptr.sum
     58   %8 = load i8, i8* %arrayidx, align 1
     59   %idxprom14 = sext i32 %3 to i64
     60   %add.ptr.sum213 = add i64 %idxprom14, %idx.ext
     61   %arrayidx15 = getelementptr inbounds i8, i8* %1, i64 %add.ptr.sum213
     62   %9 = load i8, i8* %arrayidx15, align 1
     63   %good_match = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 35
     64   %10 = load i32, i32* %good_match, align 4
     65   %cmp17 = icmp ult i32 %3, %10
     66   %shr = lshr i32 %0, 2
     67   %chain_length.0 = select i1 %cmp17, i32 %0, i32 %shr
     68   %lookahead = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 29
     69   %11 = load i32, i32* %lookahead, align 4
     70   %cmp18 = icmp ugt i32 %4, %11
     71   %. = select i1 %cmp18, i32 %11, i32 %4
     72   %match_start = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 28
     73   %add.ptr.sum217 = add i64 %idx.ext, 1
     74   %arrayidx44 = getelementptr inbounds i8, i8* %1, i64 %add.ptr.sum217
     75   %add.ptr.sum218 = add i64 %idx.ext, 2
     76   %add.ptr50 = getelementptr inbounds i8, i8* %1, i64 %add.ptr.sum218
     77   %sub.ptr.lhs.cast = ptrtoint i8* %add.ptr12 to i64
     78   br label %do.body
     79 
     80 do.body:                                          ; preds = %land.rhs131, %entry
     81   %best_len.0 = phi i32 [ %best_len.1, %land.rhs131 ], [ %3, %entry ]
     82   %chain_length.1 = phi i32 [ %dec, %land.rhs131 ], [ %chain_length.0, %entry ]
     83   %cur_match.addr.0 = phi i32 [ %conv128, %land.rhs131 ], [ %cur_match, %entry ]
     84   %scan_end1.0 = phi i8 [ %scan_end1.1, %land.rhs131 ], [ %8, %entry ]
     85   %scan_end.0 = phi i8 [ %scan_end.1, %land.rhs131 ], [ %9, %entry ]
     86   %idx.ext23 = zext i32 %cur_match.addr.0 to i64
     87   %add.ptr24 = getelementptr inbounds i8, i8* %1, i64 %idx.ext23
     88   %idxprom25 = sext i32 %best_len.0 to i64
     89   %add.ptr24.sum = add i64 %idx.ext23, %idxprom25
     90   %arrayidx26 = getelementptr inbounds i8, i8* %1, i64 %add.ptr24.sum
     91   %12 = load i8, i8* %arrayidx26, align 1
     92   %cmp28 = icmp eq i8 %12, %scan_end.0
     93   br i1 %cmp28, label %lor.lhs.false, label %do.cond125
     94 
     95 lor.lhs.false:                                    ; preds = %do.body
     96   %sub30 = add nsw i32 %best_len.0, -1
     97   %idxprom31 = sext i32 %sub30 to i64
     98   %add.ptr24.sum214 = add i64 %idx.ext23, %idxprom31
     99   %arrayidx32 = getelementptr inbounds i8, i8* %1, i64 %add.ptr24.sum214
    100   %13 = load i8, i8* %arrayidx32, align 1
    101   %cmp35 = icmp eq i8 %13, %scan_end1.0
    102   br i1 %cmp35, label %lor.lhs.false37, label %do.cond125
    103 
    104 lor.lhs.false37:                                  ; preds = %lor.lhs.false
    105   %14 = load i8, i8* %add.ptr24, align 1
    106   %15 = load i8, i8* %add.ptr, align 1
    107   %cmp40 = icmp eq i8 %14, %15
    108   br i1 %cmp40, label %lor.lhs.false42, label %do.cond125
    109 
    110 lor.lhs.false42:                                  ; preds = %lor.lhs.false37
    111   %add.ptr24.sum215 = add i64 %idx.ext23, 1
    112   %incdec.ptr = getelementptr inbounds i8, i8* %1, i64 %add.ptr24.sum215
    113   %16 = load i8, i8* %incdec.ptr, align 1
    114   %17 = load i8, i8* %arrayidx44, align 1
    115   %cmp46 = icmp eq i8 %16, %17
    116   br i1 %cmp46, label %if.end49, label %do.cond125
    117 
    118 if.end49:                                         ; preds = %lor.lhs.false42
    119   %incdec.ptr.sum = add i64 %idx.ext23, 2
    120   %incdec.ptr51 = getelementptr inbounds i8, i8* %1, i64 %incdec.ptr.sum
    121   br label %do.cond
    122 
    123 do.cond:                                          ; preds = %land.lhs.true100, %if.end49
    124   %match.0 = phi i8* [ %incdec.ptr51, %if.end49 ], [ %incdec.ptr103, %land.lhs.true100 ]
    125   %scan.1 = phi i8* [ %add.ptr50, %if.end49 ], [ %incdec.ptr101, %land.lhs.true100 ]
    126   %incdec.ptr53 = getelementptr inbounds i8, i8* %scan.1, i64 1
    127   %18 = load i8, i8* %incdec.ptr53, align 1
    128   %incdec.ptr55 = getelementptr inbounds i8, i8* %match.0, i64 1
    129   %19 = load i8, i8* %incdec.ptr55, align 1
    130   %cmp57 = icmp eq i8 %18, %19
    131   br i1 %cmp57, label %land.lhs.true, label %do.end
    132 
    133 land.lhs.true:                                    ; preds = %do.cond
    134   %incdec.ptr59 = getelementptr inbounds i8, i8* %scan.1, i64 2
    135   %20 = load i8, i8* %incdec.ptr59, align 1
    136   %incdec.ptr61 = getelementptr inbounds i8, i8* %match.0, i64 2
    137   %21 = load i8, i8* %incdec.ptr61, align 1
    138   %cmp63 = icmp eq i8 %20, %21
    139   br i1 %cmp63, label %land.lhs.true65, label %do.end
    140 
    141 land.lhs.true65:                                  ; preds = %land.lhs.true
    142   %incdec.ptr66 = getelementptr inbounds i8, i8* %scan.1, i64 3
    143   %22 = load i8, i8* %incdec.ptr66, align 1
    144   %incdec.ptr68 = getelementptr inbounds i8, i8* %match.0, i64 3
    145   %23 = load i8, i8* %incdec.ptr68, align 1
    146   %cmp70 = icmp eq i8 %22, %23
    147   br i1 %cmp70, label %land.lhs.true72, label %do.end
    148 
    149 land.lhs.true72:                                  ; preds = %land.lhs.true65
    150   %incdec.ptr73 = getelementptr inbounds i8, i8* %scan.1, i64 4
    151   %24 = load i8, i8* %incdec.ptr73, align 1
    152   %incdec.ptr75 = getelementptr inbounds i8, i8* %match.0, i64 4
    153   %25 = load i8, i8* %incdec.ptr75, align 1
    154   %cmp77 = icmp eq i8 %24, %25
    155   br i1 %cmp77, label %land.lhs.true79, label %do.end
    156 
    157 land.lhs.true79:                                  ; preds = %land.lhs.true72
    158   %incdec.ptr80 = getelementptr inbounds i8, i8* %scan.1, i64 5
    159   %26 = load i8, i8* %incdec.ptr80, align 1
    160   %incdec.ptr82 = getelementptr inbounds i8, i8* %match.0, i64 5
    161   %27 = load i8, i8* %incdec.ptr82, align 1
    162   %cmp84 = icmp eq i8 %26, %27
    163   br i1 %cmp84, label %land.lhs.true86, label %do.end
    164 
    165 land.lhs.true86:                                  ; preds = %land.lhs.true79
    166   %incdec.ptr87 = getelementptr inbounds i8, i8* %scan.1, i64 6
    167   %28 = load i8, i8* %incdec.ptr87, align 1
    168   %incdec.ptr89 = getelementptr inbounds i8, i8* %match.0, i64 6
    169   %29 = load i8, i8* %incdec.ptr89, align 1
    170   %cmp91 = icmp eq i8 %28, %29
    171   br i1 %cmp91, label %land.lhs.true93, label %do.end
    172 
    173 land.lhs.true93:                                  ; preds = %land.lhs.true86
    174   %incdec.ptr94 = getelementptr inbounds i8, i8* %scan.1, i64 7
    175   %30 = load i8, i8* %incdec.ptr94, align 1
    176   %incdec.ptr96 = getelementptr inbounds i8, i8* %match.0, i64 7
    177   %31 = load i8, i8* %incdec.ptr96, align 1
    178   %cmp98 = icmp eq i8 %30, %31
    179   br i1 %cmp98, label %land.lhs.true100, label %do.end
    180 
    181 land.lhs.true100:                                 ; preds = %land.lhs.true93
    182   %incdec.ptr101 = getelementptr inbounds i8, i8* %scan.1, i64 8
    183   %32 = load i8, i8* %incdec.ptr101, align 1
    184   %incdec.ptr103 = getelementptr inbounds i8, i8* %match.0, i64 8
    185   %33 = load i8, i8* %incdec.ptr103, align 1
    186   %cmp105 = icmp eq i8 %32, %33
    187   %cmp107 = icmp ult i8* %incdec.ptr101, %add.ptr12
    188   %or.cond = and i1 %cmp105, %cmp107
    189   br i1 %or.cond, label %do.cond, label %do.end
    190 
    191 do.end:                                           ; preds = %land.lhs.true100, %land.lhs.true93, %land.lhs.true86, %land.lhs.true79, %land.lhs.true72, %land.lhs.true65, %land.lhs.true, %do.cond
    192   %scan.2 = phi i8* [ %incdec.ptr101, %land.lhs.true100 ], [ %incdec.ptr94, %land.lhs.true93 ], [ %incdec.ptr87, %land.lhs.true86 ], [ %incdec.ptr80, %land.lhs.true79 ], [ %incdec.ptr73, %land.lhs.true72 ], [ %incdec.ptr66, %land.lhs.true65 ], [ %incdec.ptr59, %land.lhs.true ], [ %incdec.ptr53, %do.cond ]
    193   %sub.ptr.rhs.cast = ptrtoint i8* %scan.2 to i64
    194   %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
    195   %conv109 = trunc i64 %sub.ptr.sub to i32
    196   %sub110 = sub nsw i32 258, %conv109
    197   %cmp112 = icmp sgt i32 %sub110, %best_len.0
    198   br i1 %cmp112, label %if.then114, label %do.cond125
    199 
    200 if.then114:                                       ; preds = %do.end
    201   store i32 %cur_match.addr.0, i32* %match_start, align 4
    202   %cmp115 = icmp slt i32 %sub110, %.
    203   br i1 %cmp115, label %if.end118, label %do.end135
    204 
    205 if.end118:                                        ; preds = %if.then114
    206   %sub119 = add nsw i32 %sub110, -1
    207   %idxprom120 = sext i32 %sub119 to i64
    208   %add.ptr111.sum = add i64 %idxprom120, %idx.ext
    209   %arrayidx121 = getelementptr inbounds i8, i8* %1, i64 %add.ptr111.sum
    210   %34 = load i8, i8* %arrayidx121, align 1
    211   %idxprom122 = sext i32 %sub110 to i64
    212   %add.ptr111.sum216 = add i64 %idxprom122, %idx.ext
    213   %arrayidx123 = getelementptr inbounds i8, i8* %1, i64 %add.ptr111.sum216
    214   %35 = load i8, i8* %arrayidx123, align 1
    215   br label %do.cond125
    216 
    217 do.cond125:                                       ; preds = %if.end118, %do.end, %lor.lhs.false42, %lor.lhs.false37, %lor.lhs.false, %do.body
    218   %best_len.1 = phi i32 [ %best_len.0, %do.body ], [ %best_len.0, %lor.lhs.false ], [ %best_len.0, %lor.lhs.false37 ], [ %best_len.0, %lor.lhs.false42 ], [ %sub110, %if.end118 ], [ %best_len.0, %do.end ]
    219   %scan_end1.1 = phi i8 [ %scan_end1.0, %do.body ], [ %scan_end1.0, %lor.lhs.false ], [ %scan_end1.0, %lor.lhs.false37 ], [ %scan_end1.0, %lor.lhs.false42 ], [ %34, %if.end118 ], [ %scan_end1.0, %do.end ]
    220   %scan_end.1 = phi i8 [ %scan_end.0, %do.body ], [ %scan_end.0, %lor.lhs.false ], [ %scan_end.0, %lor.lhs.false37 ], [ %scan_end.0, %lor.lhs.false42 ], [ %35, %if.end118 ], [ %scan_end.0, %do.end ]
    221   %and = and i32 %cur_match.addr.0, %7
    222   %idxprom126 = zext i32 %and to i64
    223   %arrayidx127 = getelementptr inbounds i16, i16* %6, i64 %idxprom126
    224   %36 = load i16, i16* %arrayidx127, align 2
    225   %conv128 = zext i16 %36 to i32
    226   %cmp129 = icmp ugt i32 %conv128, %sub6.
    227   br i1 %cmp129, label %land.rhs131, label %do.end135
    228 
    229 land.rhs131:                                      ; preds = %do.cond125
    230   %dec = add i32 %chain_length.1, -1
    231   %cmp132 = icmp eq i32 %dec, 0
    232   br i1 %cmp132, label %do.end135, label %do.body
    233 
    234 do.end135:                                        ; preds = %land.rhs131, %do.cond125, %if.then114
    235   %best_len.2 = phi i32 [ %best_len.1, %land.rhs131 ], [ %best_len.1, %do.cond125 ], [ %sub110, %if.then114 ]
    236   %cmp137 = icmp ugt i32 %best_len.2, %11
    237   %.best_len.2 = select i1 %cmp137, i32 %11, i32 %best_len.2
    238   ret i32 %.best_len.2
    239 }
    240