Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-machineinstrs | FileCheck %s
      2 ;
      3 ; Verify that misched resource/latency balancy heuristics are sane.
      4 
      5 define void @unrolled_mmult1(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94,
      6   i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99,
      7  i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104)
      8   nounwind uwtable ssp {
      9 entry:
     10   br label %for.body
     11 
     12 ; imull folded loads should be in order and interleaved with addl, never
     13 ; adjacent. Also check that we have no spilling.
     14 ;
     15 ; Since mmult1 IR is already in good order, this effectively ensure
     16 ; the scheduler maintains source order.
     17 ;
     18 ; CHECK: %for.body
     19 ; CHECK-NOT: %rsp
     20 ; CHECK: imull 4
     21 ; CHECK-NOT: {{imull|rsp}}
     22 ; CHECK: addl
     23 ; CHECK: imull 8
     24 ; CHECK-NOT: {{imull|rsp}}
     25 ; CHECK: addl
     26 ; CHECK: imull 12
     27 ; CHECK-NOT: {{imull|rsp}}
     28 ; CHECK: addl
     29 ; CHECK: imull 16
     30 ; CHECK-NOT: {{imull|rsp}}
     31 ; CHECK: addl
     32 ; CHECK: imull 20
     33 ; CHECK-NOT: {{imull|rsp}}
     34 ; CHECK: addl
     35 ; CHECK: imull 24
     36 ; CHECK-NOT: {{imull|rsp}}
     37 ; CHECK: addl
     38 ; CHECK: imull 28
     39 ; CHECK-NOT: {{imull|rsp}}
     40 ; CHECK: addl
     41 ; CHECK: imull 32
     42 ; CHECK-NOT: {{imull|rsp}}
     43 ; CHECK: addl
     44 ; CHECK: imull 36
     45 ; CHECK-NOT: {{imull|rsp}}
     46 ; CHECK: addl
     47 ; CHECK-NOT: {{imull|rsp}}
     48 ; CHECK: %end
     49 for.body:
     50   %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
     51   %tmp57 = load i32* %tmp56, align 4
     52   %arrayidx12.us.i61 = getelementptr inbounds i32* %pre, i64 %indvars.iv42.i
     53   %tmp58 = load i32* %arrayidx12.us.i61, align 4
     54   %mul.us.i = mul nsw i32 %tmp58, %tmp57
     55   %arrayidx8.us.i.1 = getelementptr inbounds i32* %tmp56, i64 1
     56   %tmp59 = load i32* %arrayidx8.us.i.1, align 4
     57   %arrayidx12.us.i61.1 = getelementptr inbounds i32* %pre94, i64 %indvars.iv42.i
     58   %tmp60 = load i32* %arrayidx12.us.i61.1, align 4
     59   %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59
     60   %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i
     61   %arrayidx8.us.i.2 = getelementptr inbounds i32* %tmp56, i64 2
     62   %tmp61 = load i32* %arrayidx8.us.i.2, align 4
     63   %arrayidx12.us.i61.2 = getelementptr inbounds i32* %pre95, i64 %indvars.iv42.i
     64   %tmp62 = load i32* %arrayidx12.us.i61.2, align 4
     65   %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61
     66   %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1
     67   %arrayidx8.us.i.3 = getelementptr inbounds i32* %tmp56, i64 3
     68   %tmp63 = load i32* %arrayidx8.us.i.3, align 4
     69   %arrayidx12.us.i61.3 = getelementptr inbounds i32* %pre96, i64 %indvars.iv42.i
     70   %tmp64 = load i32* %arrayidx12.us.i61.3, align 4
     71   %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63
     72   %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2
     73   %arrayidx8.us.i.4 = getelementptr inbounds i32* %tmp56, i64 4
     74   %tmp65 = load i32* %arrayidx8.us.i.4, align 4
     75   %arrayidx12.us.i61.4 = getelementptr inbounds i32* %pre97, i64 %indvars.iv42.i
     76   %tmp66 = load i32* %arrayidx12.us.i61.4, align 4
     77   %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65
     78   %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3
     79   %arrayidx8.us.i.5 = getelementptr inbounds i32* %tmp56, i64 5
     80   %tmp67 = load i32* %arrayidx8.us.i.5, align 4
     81   %arrayidx12.us.i61.5 = getelementptr inbounds i32* %pre98, i64 %indvars.iv42.i
     82   %tmp68 = load i32* %arrayidx12.us.i61.5, align 4
     83   %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67
     84   %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4
     85   %arrayidx8.us.i.6 = getelementptr inbounds i32* %tmp56, i64 6
     86   %tmp69 = load i32* %arrayidx8.us.i.6, align 4
     87   %arrayidx12.us.i61.6 = getelementptr inbounds i32* %pre99, i64 %indvars.iv42.i
     88   %tmp70 = load i32* %arrayidx12.us.i61.6, align 4
     89   %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69
     90   %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5
     91   %arrayidx8.us.i.7 = getelementptr inbounds i32* %tmp56, i64 7
     92   %tmp71 = load i32* %arrayidx8.us.i.7, align 4
     93   %arrayidx12.us.i61.7 = getelementptr inbounds i32* %pre100, i64 %indvars.iv42.i
     94   %tmp72 = load i32* %arrayidx12.us.i61.7, align 4
     95   %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71
     96   %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6
     97   %arrayidx8.us.i.8 = getelementptr inbounds i32* %tmp56, i64 8
     98   %tmp73 = load i32* %arrayidx8.us.i.8, align 4
     99   %arrayidx12.us.i61.8 = getelementptr inbounds i32* %pre101, i64 %indvars.iv42.i
    100   %tmp74 = load i32* %arrayidx12.us.i61.8, align 4
    101   %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73
    102   %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7
    103   %arrayidx8.us.i.9 = getelementptr inbounds i32* %tmp56, i64 9
    104   %tmp75 = load i32* %arrayidx8.us.i.9, align 4
    105   %arrayidx12.us.i61.9 = getelementptr inbounds i32* %pre102, i64 %indvars.iv42.i
    106   %tmp76 = load i32* %arrayidx12.us.i61.9, align 4
    107   %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75
    108   %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8
    109   %arrayidx16.us.i = getelementptr inbounds i32* %tmp55, i64 %indvars.iv42.i
    110   store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4
    111   %indvars.iv.next43.i = add i64 %indvars.iv42.i, 1
    112   %lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32
    113   %exitcond = icmp eq i32 %lftr.wideiv, 10
    114   br i1 %exitcond, label %end, label %for.body
    115 
    116 end:
    117   ret void
    118 }
    119 
    120 ; Unlike the above loop, this IR starts out bad and must be
    121 ; rescheduled.
    122 ;
    123 ; CHECK: %for.body
    124 ; CHECK-NOT: %rsp
    125 ; CHECK: imull 4
    126 ; CHECK-NOT: {{imull|rsp}}
    127 ; CHECK: addl
    128 ; CHECK: imull 8
    129 ; CHECK-NOT: {{imull|rsp}}
    130 ; CHECK: addl
    131 ; CHECK: imull 12
    132 ; CHECK-NOT: {{imull|rsp}}
    133 ; CHECK: addl
    134 ; CHECK: imull 16
    135 ; CHECK-NOT: {{imull|rsp}}
    136 ; CHECK: addl
    137 ; CHECK: imull 20
    138 ; CHECK-NOT: {{imull|rsp}}
    139 ; CHECK: addl
    140 ; CHECK: imull 24
    141 ; CHECK-NOT: {{imull|rsp}}
    142 ; CHECK: addl
    143 ; CHECK: imull 28
    144 ; CHECK-NOT: {{imull|rsp}}
    145 ; CHECK: addl
    146 ; CHECK: imull 32
    147 ; CHECK-NOT: {{imull|rsp}}
    148 ; CHECK: addl
    149 ; CHECK: imull 36
    150 ; CHECK-NOT: {{imull|rsp}}
    151 ; CHECK: addl
    152 ; CHECK-NOT: {{imull|rsp}}
    153 ; CHECK: %end
    154 define void @unrolled_mmult2(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94,
    155   i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99,
    156   i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104)
    157   nounwind uwtable ssp {
    158 entry:
    159   br label %for.body
    160 for.body:
    161   %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
    162   %tmp57 = load i32* %tmp56, align 4
    163   %arrayidx12.us.i61 = getelementptr inbounds i32* %pre, i64 %indvars.iv42.i
    164   %tmp58 = load i32* %arrayidx12.us.i61, align 4
    165   %arrayidx8.us.i.1 = getelementptr inbounds i32* %tmp56, i64 1
    166   %tmp59 = load i32* %arrayidx8.us.i.1, align 4
    167   %arrayidx12.us.i61.1 = getelementptr inbounds i32* %pre94, i64 %indvars.iv42.i
    168   %tmp60 = load i32* %arrayidx12.us.i61.1, align 4
    169   %arrayidx8.us.i.2 = getelementptr inbounds i32* %tmp56, i64 2
    170   %tmp61 = load i32* %arrayidx8.us.i.2, align 4
    171   %arrayidx12.us.i61.2 = getelementptr inbounds i32* %pre95, i64 %indvars.iv42.i
    172   %tmp62 = load i32* %arrayidx12.us.i61.2, align 4
    173   %arrayidx8.us.i.3 = getelementptr inbounds i32* %tmp56, i64 3
    174   %tmp63 = load i32* %arrayidx8.us.i.3, align 4
    175   %arrayidx12.us.i61.3 = getelementptr inbounds i32* %pre96, i64 %indvars.iv42.i
    176   %tmp64 = load i32* %arrayidx12.us.i61.3, align 4
    177   %arrayidx8.us.i.4 = getelementptr inbounds i32* %tmp56, i64 4
    178   %tmp65 = load i32* %arrayidx8.us.i.4, align 4
    179   %arrayidx12.us.i61.4 = getelementptr inbounds i32* %pre97, i64 %indvars.iv42.i
    180   %tmp66 = load i32* %arrayidx12.us.i61.4, align 4
    181   %arrayidx8.us.i.5 = getelementptr inbounds i32* %tmp56, i64 5
    182   %tmp67 = load i32* %arrayidx8.us.i.5, align 4
    183   %arrayidx12.us.i61.5 = getelementptr inbounds i32* %pre98, i64 %indvars.iv42.i
    184   %tmp68 = load i32* %arrayidx12.us.i61.5, align 4
    185   %arrayidx8.us.i.6 = getelementptr inbounds i32* %tmp56, i64 6
    186   %tmp69 = load i32* %arrayidx8.us.i.6, align 4
    187   %arrayidx12.us.i61.6 = getelementptr inbounds i32* %pre99, i64 %indvars.iv42.i
    188   %tmp70 = load i32* %arrayidx12.us.i61.6, align 4
    189   %mul.us.i = mul nsw i32 %tmp58, %tmp57
    190   %arrayidx8.us.i.7 = getelementptr inbounds i32* %tmp56, i64 7
    191   %tmp71 = load i32* %arrayidx8.us.i.7, align 4
    192   %arrayidx12.us.i61.7 = getelementptr inbounds i32* %pre100, i64 %indvars.iv42.i
    193   %tmp72 = load i32* %arrayidx12.us.i61.7, align 4
    194   %arrayidx8.us.i.8 = getelementptr inbounds i32* %tmp56, i64 8
    195   %tmp73 = load i32* %arrayidx8.us.i.8, align 4
    196   %arrayidx12.us.i61.8 = getelementptr inbounds i32* %pre101, i64 %indvars.iv42.i
    197   %tmp74 = load i32* %arrayidx12.us.i61.8, align 4
    198   %arrayidx8.us.i.9 = getelementptr inbounds i32* %tmp56, i64 9
    199   %tmp75 = load i32* %arrayidx8.us.i.9, align 4
    200   %arrayidx12.us.i61.9 = getelementptr inbounds i32* %pre102, i64 %indvars.iv42.i
    201   %tmp76 = load i32* %arrayidx12.us.i61.9, align 4
    202   %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59
    203   %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i
    204   %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61
    205   %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1
    206   %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63
    207   %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2
    208   %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65
    209   %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3
    210   %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67
    211   %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4
    212   %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69
    213   %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5
    214   %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71
    215   %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6
    216   %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73
    217   %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7
    218   %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75
    219   %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8
    220   %arrayidx16.us.i = getelementptr inbounds i32* %tmp55, i64 %indvars.iv42.i
    221   store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4
    222   %indvars.iv.next43.i = add i64 %indvars.iv42.i, 1
    223   %lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32
    224   %exitcond = icmp eq i32 %lftr.wideiv, 10
    225   br i1 %exitcond, label %end, label %for.body
    226 
    227 end:
    228   ret void
    229 }
    230 
    231 ; A mildly interesting little block extracted from a cipher.  The
    232 ; balanced heuristics are interesting here because we have resource,
    233 ; latency, and register limits all at once. For now, simply check that
    234 ; we don't use any callee-saves.
    235 ; CHECK: @encpc1
    236 ; CHECK: %entry
    237 ; CHECK-NOT: push
    238 ; CHECK-NOT: pop
    239 ; CHECK: ret
    240 @a = external global i32, align 4
    241 @b = external global i32, align 4
    242 @c = external global i32, align 4
    243 @d = external global i32, align 4
    244 define i32 @encpc1() nounwind {
    245 entry:
    246   %l1 = load i32* @a, align 16
    247   %conv = shl i32 %l1, 8
    248   %s5 = lshr i32 %l1, 8
    249   %add = or i32 %conv, %s5
    250   store i32 %add, i32* @b
    251   %l6 = load i32* @a
    252   %l7 = load i32* @c
    253   %add.i = add i32 %l7, %l6
    254   %idxprom.i = zext i32 %l7 to i64
    255   %arrayidx.i = getelementptr inbounds i32* @d, i64 %idxprom.i
    256   %l8 = load i32* %arrayidx.i
    257   store i32 346, i32* @c
    258   store i32 20021, i32* @d
    259   %l9 = load i32* @a
    260   store i32 %l8, i32* @a
    261   store i32 %l9, i32* @b
    262   store i32 %add.i, i32* @c
    263   store i32 %l9, i32* @d
    264   %cmp.i = icmp eq i32 %add.i, 0
    265   %s10 = lshr i32 %l1, 16
    266   %s12 = lshr i32 %l1, 24
    267   %s14 = lshr i32 %l1, 30
    268   br i1 %cmp.i, label %if, label %return
    269 if:
    270   %sa = add i32 %s5, %s10
    271   %sb = add i32 %sa, %s12
    272   %sc = add i32 %sb, %s14
    273   br label %return
    274 return:
    275   %result = phi i32 [0, %entry], [%sc, %if]
    276   ret i32 %result
    277 }
    278