Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
      2 ; RUN:          -verify-machineinstrs | FileCheck %s
      3 ;
      4 ; Verify that misched resource/latency balancy heuristics are sane.
      5 
      6 define void @unrolled_mmult1(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94,
      7   i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99,
      8  i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104)
      9   nounwind uwtable ssp {
     10 entry:
     11   br label %for.body
     12 
     13 ; imull folded loads should be in order and interleaved with addl, never
     14 ; adjacent. Also check that we have no spilling.
     15 ;
     16 ; Since mmult1 IR is already in good order, this effectively ensure
     17 ; the scheduler maintains source order.
     18 ;
     19 ; CHECK: %for.body
     20 ; CHECK-NOT: %rsp
     21 ; CHECK: imull 4
     22 ; CHECK-NOT: {{imull|rsp}}
     23 ; CHECK: addl
     24 ; CHECK: imull 8
     25 ; CHECK-NOT: {{imull|rsp}}
     26 ; CHECK: addl
     27 ; CHECK: imull 12
     28 ; CHECK-NOT: {{imull|rsp}}
     29 ; CHECK: addl
     30 ; CHECK: imull 16
     31 ; CHECK-NOT: {{imull|rsp}}
     32 ; CHECK: addl
     33 ; CHECK: imull 20
     34 ; CHECK-NOT: {{imull|rsp}}
     35 ; CHECK: addl
     36 ; CHECK: imull 24
     37 ; CHECK-NOT: {{imull|rsp}}
     38 ; CHECK: addl
     39 ; CHECK: imull 28
     40 ; CHECK-NOT: {{imull|rsp}}
     41 ; CHECK: addl
     42 ; CHECK: imull 32
     43 ; CHECK-NOT: {{imull|rsp}}
     44 ; CHECK: addl
     45 ; CHECK: imull 36
     46 ; CHECK-NOT: {{imull|rsp}}
     47 ; CHECK: addl
     48 ; CHECK-NOT: {{imull|rsp}}
     49 ; CHECK: %end
     50 for.body:
     51   %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
     52   %tmp57 = load i32* %tmp56, align 4
     53   %arrayidx12.us.i61 = getelementptr inbounds i32* %pre, i64 %indvars.iv42.i
     54   %tmp58 = load i32* %arrayidx12.us.i61, align 4
     55   %mul.us.i = mul nsw i32 %tmp58, %tmp57
     56   %arrayidx8.us.i.1 = getelementptr inbounds i32* %tmp56, i64 1
     57   %tmp59 = load i32* %arrayidx8.us.i.1, align 4
     58   %arrayidx12.us.i61.1 = getelementptr inbounds i32* %pre94, i64 %indvars.iv42.i
     59   %tmp60 = load i32* %arrayidx12.us.i61.1, align 4
     60   %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59
     61   %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i
     62   %arrayidx8.us.i.2 = getelementptr inbounds i32* %tmp56, i64 2
     63   %tmp61 = load i32* %arrayidx8.us.i.2, align 4
     64   %arrayidx12.us.i61.2 = getelementptr inbounds i32* %pre95, i64 %indvars.iv42.i
     65   %tmp62 = load i32* %arrayidx12.us.i61.2, align 4
     66   %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61
     67   %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1
     68   %arrayidx8.us.i.3 = getelementptr inbounds i32* %tmp56, i64 3
     69   %tmp63 = load i32* %arrayidx8.us.i.3, align 4
     70   %arrayidx12.us.i61.3 = getelementptr inbounds i32* %pre96, i64 %indvars.iv42.i
     71   %tmp64 = load i32* %arrayidx12.us.i61.3, align 4
     72   %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63
     73   %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2
     74   %arrayidx8.us.i.4 = getelementptr inbounds i32* %tmp56, i64 4
     75   %tmp65 = load i32* %arrayidx8.us.i.4, align 4
     76   %arrayidx12.us.i61.4 = getelementptr inbounds i32* %pre97, i64 %indvars.iv42.i
     77   %tmp66 = load i32* %arrayidx12.us.i61.4, align 4
     78   %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65
     79   %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3
     80   %arrayidx8.us.i.5 = getelementptr inbounds i32* %tmp56, i64 5
     81   %tmp67 = load i32* %arrayidx8.us.i.5, align 4
     82   %arrayidx12.us.i61.5 = getelementptr inbounds i32* %pre98, i64 %indvars.iv42.i
     83   %tmp68 = load i32* %arrayidx12.us.i61.5, align 4
     84   %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67
     85   %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4
     86   %arrayidx8.us.i.6 = getelementptr inbounds i32* %tmp56, i64 6
     87   %tmp69 = load i32* %arrayidx8.us.i.6, align 4
     88   %arrayidx12.us.i61.6 = getelementptr inbounds i32* %pre99, i64 %indvars.iv42.i
     89   %tmp70 = load i32* %arrayidx12.us.i61.6, align 4
     90   %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69
     91   %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5
     92   %arrayidx8.us.i.7 = getelementptr inbounds i32* %tmp56, i64 7
     93   %tmp71 = load i32* %arrayidx8.us.i.7, align 4
     94   %arrayidx12.us.i61.7 = getelementptr inbounds i32* %pre100, i64 %indvars.iv42.i
     95   %tmp72 = load i32* %arrayidx12.us.i61.7, align 4
     96   %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71
     97   %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6
     98   %arrayidx8.us.i.8 = getelementptr inbounds i32* %tmp56, i64 8
     99   %tmp73 = load i32* %arrayidx8.us.i.8, align 4
    100   %arrayidx12.us.i61.8 = getelementptr inbounds i32* %pre101, i64 %indvars.iv42.i
    101   %tmp74 = load i32* %arrayidx12.us.i61.8, align 4
    102   %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73
    103   %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7
    104   %arrayidx8.us.i.9 = getelementptr inbounds i32* %tmp56, i64 9
    105   %tmp75 = load i32* %arrayidx8.us.i.9, align 4
    106   %arrayidx12.us.i61.9 = getelementptr inbounds i32* %pre102, i64 %indvars.iv42.i
    107   %tmp76 = load i32* %arrayidx12.us.i61.9, align 4
    108   %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75
    109   %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8
    110   %arrayidx16.us.i = getelementptr inbounds i32* %tmp55, i64 %indvars.iv42.i
    111   store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4
    112   %indvars.iv.next43.i = add i64 %indvars.iv42.i, 1
    113   %lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32
    114   %exitcond = icmp eq i32 %lftr.wideiv, 10
    115   br i1 %exitcond, label %end, label %for.body
    116 
    117 end:
    118   ret void
    119 }
    120 
    121 ; Unlike the above loop, this IR starts out bad and must be
    122 ; rescheduled.
    123 ;
    124 ; CHECK: %for.body
    125 ; CHECK-NOT: %rsp
    126 ; CHECK: imull 4
    127 ; CHECK-NOT: {{imull|rsp}}
    128 ; CHECK: addl
    129 ; CHECK: imull 8
    130 ; CHECK-NOT: {{imull|rsp}}
    131 ; CHECK: addl
    132 ; CHECK: imull 12
    133 ; CHECK-NOT: {{imull|rsp}}
    134 ; CHECK: addl
    135 ; CHECK: imull 16
    136 ; CHECK-NOT: {{imull|rsp}}
    137 ; CHECK: addl
    138 ; CHECK: imull 20
    139 ; CHECK-NOT: {{imull|rsp}}
    140 ; CHECK: addl
    141 ; CHECK: imull 24
    142 ; CHECK-NOT: {{imull|rsp}}
    143 ; CHECK: addl
    144 ; CHECK: imull 28
    145 ; CHECK-NOT: {{imull|rsp}}
    146 ; CHECK: addl
    147 ; CHECK: imull 32
    148 ; CHECK-NOT: {{imull|rsp}}
    149 ; CHECK: addl
    150 ; CHECK: imull 36
    151 ; CHECK-NOT: {{imull|rsp}}
    152 ; CHECK: addl
    153 ; CHECK-NOT: {{imull|rsp}}
    154 ; CHECK: %end
    155 define void @unrolled_mmult2(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94,
    156   i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99,
    157   i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104)
    158   nounwind uwtable ssp {
    159 entry:
    160   br label %for.body
    161 for.body:
    162   %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
    163   %tmp57 = load i32* %tmp56, align 4
    164   %arrayidx12.us.i61 = getelementptr inbounds i32* %pre, i64 %indvars.iv42.i
    165   %tmp58 = load i32* %arrayidx12.us.i61, align 4
    166   %arrayidx8.us.i.1 = getelementptr inbounds i32* %tmp56, i64 1
    167   %tmp59 = load i32* %arrayidx8.us.i.1, align 4
    168   %arrayidx12.us.i61.1 = getelementptr inbounds i32* %pre94, i64 %indvars.iv42.i
    169   %tmp60 = load i32* %arrayidx12.us.i61.1, align 4
    170   %arrayidx8.us.i.2 = getelementptr inbounds i32* %tmp56, i64 2
    171   %tmp61 = load i32* %arrayidx8.us.i.2, align 4
    172   %arrayidx12.us.i61.2 = getelementptr inbounds i32* %pre95, i64 %indvars.iv42.i
    173   %tmp62 = load i32* %arrayidx12.us.i61.2, align 4
    174   %arrayidx8.us.i.3 = getelementptr inbounds i32* %tmp56, i64 3
    175   %tmp63 = load i32* %arrayidx8.us.i.3, align 4
    176   %arrayidx12.us.i61.3 = getelementptr inbounds i32* %pre96, i64 %indvars.iv42.i
    177   %tmp64 = load i32* %arrayidx12.us.i61.3, align 4
    178   %arrayidx8.us.i.4 = getelementptr inbounds i32* %tmp56, i64 4
    179   %tmp65 = load i32* %arrayidx8.us.i.4, align 4
    180   %arrayidx12.us.i61.4 = getelementptr inbounds i32* %pre97, i64 %indvars.iv42.i
    181   %tmp66 = load i32* %arrayidx12.us.i61.4, align 4
    182   %arrayidx8.us.i.5 = getelementptr inbounds i32* %tmp56, i64 5
    183   %tmp67 = load i32* %arrayidx8.us.i.5, align 4
    184   %arrayidx12.us.i61.5 = getelementptr inbounds i32* %pre98, i64 %indvars.iv42.i
    185   %tmp68 = load i32* %arrayidx12.us.i61.5, align 4
    186   %arrayidx8.us.i.6 = getelementptr inbounds i32* %tmp56, i64 6
    187   %tmp69 = load i32* %arrayidx8.us.i.6, align 4
    188   %arrayidx12.us.i61.6 = getelementptr inbounds i32* %pre99, i64 %indvars.iv42.i
    189   %tmp70 = load i32* %arrayidx12.us.i61.6, align 4
    190   %mul.us.i = mul nsw i32 %tmp58, %tmp57
    191   %arrayidx8.us.i.7 = getelementptr inbounds i32* %tmp56, i64 7
    192   %tmp71 = load i32* %arrayidx8.us.i.7, align 4
    193   %arrayidx12.us.i61.7 = getelementptr inbounds i32* %pre100, i64 %indvars.iv42.i
    194   %tmp72 = load i32* %arrayidx12.us.i61.7, align 4
    195   %arrayidx8.us.i.8 = getelementptr inbounds i32* %tmp56, i64 8
    196   %tmp73 = load i32* %arrayidx8.us.i.8, align 4
    197   %arrayidx12.us.i61.8 = getelementptr inbounds i32* %pre101, i64 %indvars.iv42.i
    198   %tmp74 = load i32* %arrayidx12.us.i61.8, align 4
    199   %arrayidx8.us.i.9 = getelementptr inbounds i32* %tmp56, i64 9
    200   %tmp75 = load i32* %arrayidx8.us.i.9, align 4
    201   %arrayidx12.us.i61.9 = getelementptr inbounds i32* %pre102, i64 %indvars.iv42.i
    202   %tmp76 = load i32* %arrayidx12.us.i61.9, align 4
    203   %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59
    204   %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i
    205   %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61
    206   %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1
    207   %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63
    208   %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2
    209   %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65
    210   %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3
    211   %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67
    212   %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4
    213   %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69
    214   %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5
    215   %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71
    216   %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6
    217   %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73
    218   %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7
    219   %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75
    220   %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8
    221   %arrayidx16.us.i = getelementptr inbounds i32* %tmp55, i64 %indvars.iv42.i
    222   store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4
    223   %indvars.iv.next43.i = add i64 %indvars.iv42.i, 1
    224   %lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32
    225   %exitcond = icmp eq i32 %lftr.wideiv, 10
    226   br i1 %exitcond, label %end, label %for.body
    227 
    228 end:
    229   ret void
    230 }
    231