1 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \ 2 ; RUN: -verify-machineinstrs | FileCheck %s 3 ; 4 ; Verify that misched resource/latency balancy heuristics are sane. 5 6 define void @unrolled_mmult1(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94, 7 i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99, 8 i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104) 9 nounwind uwtable ssp { 10 entry: 11 br label %for.body 12 13 ; imull folded loads should be in order and interleaved with addl, never 14 ; adjacent. Also check that we have no spilling. 15 ; 16 ; Since mmult1 IR is already in good order, this effectively ensure 17 ; the scheduler maintains source order. 18 ; 19 ; CHECK: %for.body 20 ; CHECK-NOT: %rsp 21 ; CHECK: imull 4 22 ; CHECK-NOT: {{imull|rsp}} 23 ; CHECK: addl 24 ; CHECK: imull 8 25 ; CHECK-NOT: {{imull|rsp}} 26 ; CHECK: addl 27 ; CHECK: imull 12 28 ; CHECK-NOT: {{imull|rsp}} 29 ; CHECK: addl 30 ; CHECK: imull 16 31 ; CHECK-NOT: {{imull|rsp}} 32 ; CHECK: addl 33 ; CHECK: imull 20 34 ; CHECK-NOT: {{imull|rsp}} 35 ; CHECK: addl 36 ; CHECK: imull 24 37 ; CHECK-NOT: {{imull|rsp}} 38 ; CHECK: addl 39 ; CHECK: imull 28 40 ; CHECK-NOT: {{imull|rsp}} 41 ; CHECK: addl 42 ; CHECK: imull 32 43 ; CHECK-NOT: {{imull|rsp}} 44 ; CHECK: addl 45 ; CHECK: imull 36 46 ; CHECK-NOT: {{imull|rsp}} 47 ; CHECK: addl 48 ; CHECK-NOT: {{imull|rsp}} 49 ; CHECK: %end 50 for.body: 51 %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ] 52 %tmp57 = load i32* %tmp56, align 4 53 %arrayidx12.us.i61 = getelementptr inbounds i32* %pre, i64 %indvars.iv42.i 54 %tmp58 = load i32* %arrayidx12.us.i61, align 4 55 %mul.us.i = mul nsw i32 %tmp58, %tmp57 56 %arrayidx8.us.i.1 = getelementptr inbounds i32* %tmp56, i64 1 57 %tmp59 = load i32* %arrayidx8.us.i.1, align 4 58 %arrayidx12.us.i61.1 = getelementptr inbounds i32* %pre94, i64 %indvars.iv42.i 59 %tmp60 = load i32* %arrayidx12.us.i61.1, align 4 60 %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59 61 %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i 62 %arrayidx8.us.i.2 = getelementptr inbounds i32* %tmp56, i64 2 63 %tmp61 = load i32* %arrayidx8.us.i.2, align 4 64 %arrayidx12.us.i61.2 = getelementptr inbounds i32* %pre95, i64 %indvars.iv42.i 65 %tmp62 = load i32* %arrayidx12.us.i61.2, align 4 66 %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61 67 %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1 68 %arrayidx8.us.i.3 = getelementptr inbounds i32* %tmp56, i64 3 69 %tmp63 = load i32* %arrayidx8.us.i.3, align 4 70 %arrayidx12.us.i61.3 = getelementptr inbounds i32* %pre96, i64 %indvars.iv42.i 71 %tmp64 = load i32* %arrayidx12.us.i61.3, align 4 72 %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63 73 %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2 74 %arrayidx8.us.i.4 = getelementptr inbounds i32* %tmp56, i64 4 75 %tmp65 = load i32* %arrayidx8.us.i.4, align 4 76 %arrayidx12.us.i61.4 = getelementptr inbounds i32* %pre97, i64 %indvars.iv42.i 77 %tmp66 = load i32* %arrayidx12.us.i61.4, align 4 78 %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65 79 %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3 80 %arrayidx8.us.i.5 = getelementptr inbounds i32* %tmp56, i64 5 81 %tmp67 = load i32* %arrayidx8.us.i.5, align 4 82 %arrayidx12.us.i61.5 = getelementptr inbounds i32* %pre98, i64 %indvars.iv42.i 83 %tmp68 = load i32* %arrayidx12.us.i61.5, align 4 84 %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67 85 %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4 86 %arrayidx8.us.i.6 = getelementptr inbounds i32* %tmp56, i64 6 87 %tmp69 = load i32* %arrayidx8.us.i.6, align 4 88 %arrayidx12.us.i61.6 = getelementptr inbounds i32* %pre99, i64 %indvars.iv42.i 89 %tmp70 = load i32* %arrayidx12.us.i61.6, align 4 90 %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69 91 %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5 92 %arrayidx8.us.i.7 = getelementptr inbounds i32* %tmp56, i64 7 93 %tmp71 = load i32* %arrayidx8.us.i.7, align 4 94 %arrayidx12.us.i61.7 = getelementptr inbounds i32* %pre100, i64 %indvars.iv42.i 95 %tmp72 = load i32* %arrayidx12.us.i61.7, align 4 96 %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71 97 %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6 98 %arrayidx8.us.i.8 = getelementptr inbounds i32* %tmp56, i64 8 99 %tmp73 = load i32* %arrayidx8.us.i.8, align 4 100 %arrayidx12.us.i61.8 = getelementptr inbounds i32* %pre101, i64 %indvars.iv42.i 101 %tmp74 = load i32* %arrayidx12.us.i61.8, align 4 102 %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73 103 %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7 104 %arrayidx8.us.i.9 = getelementptr inbounds i32* %tmp56, i64 9 105 %tmp75 = load i32* %arrayidx8.us.i.9, align 4 106 %arrayidx12.us.i61.9 = getelementptr inbounds i32* %pre102, i64 %indvars.iv42.i 107 %tmp76 = load i32* %arrayidx12.us.i61.9, align 4 108 %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75 109 %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8 110 %arrayidx16.us.i = getelementptr inbounds i32* %tmp55, i64 %indvars.iv42.i 111 store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4 112 %indvars.iv.next43.i = add i64 %indvars.iv42.i, 1 113 %lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32 114 %exitcond = icmp eq i32 %lftr.wideiv, 10 115 br i1 %exitcond, label %end, label %for.body 116 117 end: 118 ret void 119 } 120 121 ; Unlike the above loop, this IR starts out bad and must be 122 ; rescheduled. 123 ; 124 ; CHECK: %for.body 125 ; CHECK-NOT: %rsp 126 ; CHECK: imull 4 127 ; CHECK-NOT: {{imull|rsp}} 128 ; CHECK: addl 129 ; CHECK: imull 8 130 ; CHECK-NOT: {{imull|rsp}} 131 ; CHECK: addl 132 ; CHECK: imull 12 133 ; CHECK-NOT: {{imull|rsp}} 134 ; CHECK: addl 135 ; CHECK: imull 16 136 ; CHECK-NOT: {{imull|rsp}} 137 ; CHECK: addl 138 ; CHECK: imull 20 139 ; CHECK-NOT: {{imull|rsp}} 140 ; CHECK: addl 141 ; CHECK: imull 24 142 ; CHECK-NOT: {{imull|rsp}} 143 ; CHECK: addl 144 ; CHECK: imull 28 145 ; CHECK-NOT: {{imull|rsp}} 146 ; CHECK: addl 147 ; CHECK: imull 32 148 ; CHECK-NOT: {{imull|rsp}} 149 ; CHECK: addl 150 ; CHECK: imull 36 151 ; CHECK-NOT: {{imull|rsp}} 152 ; CHECK: addl 153 ; CHECK-NOT: {{imull|rsp}} 154 ; CHECK: %end 155 define void @unrolled_mmult2(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94, 156 i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99, 157 i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104) 158 nounwind uwtable ssp { 159 entry: 160 br label %for.body 161 for.body: 162 %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ] 163 %tmp57 = load i32* %tmp56, align 4 164 %arrayidx12.us.i61 = getelementptr inbounds i32* %pre, i64 %indvars.iv42.i 165 %tmp58 = load i32* %arrayidx12.us.i61, align 4 166 %arrayidx8.us.i.1 = getelementptr inbounds i32* %tmp56, i64 1 167 %tmp59 = load i32* %arrayidx8.us.i.1, align 4 168 %arrayidx12.us.i61.1 = getelementptr inbounds i32* %pre94, i64 %indvars.iv42.i 169 %tmp60 = load i32* %arrayidx12.us.i61.1, align 4 170 %arrayidx8.us.i.2 = getelementptr inbounds i32* %tmp56, i64 2 171 %tmp61 = load i32* %arrayidx8.us.i.2, align 4 172 %arrayidx12.us.i61.2 = getelementptr inbounds i32* %pre95, i64 %indvars.iv42.i 173 %tmp62 = load i32* %arrayidx12.us.i61.2, align 4 174 %arrayidx8.us.i.3 = getelementptr inbounds i32* %tmp56, i64 3 175 %tmp63 = load i32* %arrayidx8.us.i.3, align 4 176 %arrayidx12.us.i61.3 = getelementptr inbounds i32* %pre96, i64 %indvars.iv42.i 177 %tmp64 = load i32* %arrayidx12.us.i61.3, align 4 178 %arrayidx8.us.i.4 = getelementptr inbounds i32* %tmp56, i64 4 179 %tmp65 = load i32* %arrayidx8.us.i.4, align 4 180 %arrayidx12.us.i61.4 = getelementptr inbounds i32* %pre97, i64 %indvars.iv42.i 181 %tmp66 = load i32* %arrayidx12.us.i61.4, align 4 182 %arrayidx8.us.i.5 = getelementptr inbounds i32* %tmp56, i64 5 183 %tmp67 = load i32* %arrayidx8.us.i.5, align 4 184 %arrayidx12.us.i61.5 = getelementptr inbounds i32* %pre98, i64 %indvars.iv42.i 185 %tmp68 = load i32* %arrayidx12.us.i61.5, align 4 186 %arrayidx8.us.i.6 = getelementptr inbounds i32* %tmp56, i64 6 187 %tmp69 = load i32* %arrayidx8.us.i.6, align 4 188 %arrayidx12.us.i61.6 = getelementptr inbounds i32* %pre99, i64 %indvars.iv42.i 189 %tmp70 = load i32* %arrayidx12.us.i61.6, align 4 190 %mul.us.i = mul nsw i32 %tmp58, %tmp57 191 %arrayidx8.us.i.7 = getelementptr inbounds i32* %tmp56, i64 7 192 %tmp71 = load i32* %arrayidx8.us.i.7, align 4 193 %arrayidx12.us.i61.7 = getelementptr inbounds i32* %pre100, i64 %indvars.iv42.i 194 %tmp72 = load i32* %arrayidx12.us.i61.7, align 4 195 %arrayidx8.us.i.8 = getelementptr inbounds i32* %tmp56, i64 8 196 %tmp73 = load i32* %arrayidx8.us.i.8, align 4 197 %arrayidx12.us.i61.8 = getelementptr inbounds i32* %pre101, i64 %indvars.iv42.i 198 %tmp74 = load i32* %arrayidx12.us.i61.8, align 4 199 %arrayidx8.us.i.9 = getelementptr inbounds i32* %tmp56, i64 9 200 %tmp75 = load i32* %arrayidx8.us.i.9, align 4 201 %arrayidx12.us.i61.9 = getelementptr inbounds i32* %pre102, i64 %indvars.iv42.i 202 %tmp76 = load i32* %arrayidx12.us.i61.9, align 4 203 %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59 204 %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i 205 %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61 206 %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1 207 %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63 208 %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2 209 %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65 210 %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3 211 %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67 212 %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4 213 %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69 214 %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5 215 %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71 216 %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6 217 %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73 218 %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7 219 %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75 220 %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8 221 %arrayidx16.us.i = getelementptr inbounds i32* %tmp55, i64 %indvars.iv42.i 222 store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4 223 %indvars.iv.next43.i = add i64 %indvars.iv42.i, 1 224 %lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32 225 %exitcond = icmp eq i32 %lftr.wideiv, 10 226 br i1 %exitcond, label %end, label %for.body 227 228 end: 229 ret void 230 } 231