1 //=- X86ScheduleSLM.td - X86 Silvermont Scheduling -----------*- tablegen -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the machine model for Intel Silvermont to support 11 // instruction scheduling and other instruction cost heuristics. 12 // 13 //===----------------------------------------------------------------------===// 14 15 def SLMModel : SchedMachineModel { 16 // All x86 instructions are modeled as a single micro-op, and SLM can decode 2 17 // instructions per cycle. 18 let IssueWidth = 2; 19 let MicroOpBufferSize = 32; // Based on the reorder buffer. 20 let LoadLatency = 3; 21 let MispredictPenalty = 10; 22 let PostRAScheduler = 1; 23 24 // For small loops, expand by a small factor to hide the backedge cost. 25 let LoopMicroOpBufferSize = 10; 26 27 // FIXME: SSE4 is unimplemented. This flag is set to allow 28 // the scheduler to assign a default model to unrecognized opcodes. 29 let CompleteModel = 0; 30 } 31 32 let SchedModel = SLMModel in { 33 34 // Silvermont has 5 reservation stations for micro-ops 35 36 def IEC_RSV0 : ProcResource<1>; 37 def IEC_RSV1 : ProcResource<1>; 38 def FPC_RSV0 : ProcResource<1> { let BufferSize = 1; } 39 def FPC_RSV1 : ProcResource<1> { let BufferSize = 1; } 40 def MEC_RSV : ProcResource<1>; 41 42 // Many micro-ops are capable of issuing on multiple ports. 43 def IEC_RSV01 : ProcResGroup<[IEC_RSV0, IEC_RSV1]>; 44 def FPC_RSV01 : ProcResGroup<[FPC_RSV0, FPC_RSV1]>; 45 46 def SMDivider : ProcResource<1>; 47 def SMFPMultiplier : ProcResource<1>; 48 def SMFPDivider : ProcResource<1>; 49 50 // Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3 51 // cycles after the memory operand. 52 def : ReadAdvance<ReadAfterLd, 3>; 53 54 // Many SchedWrites are defined in pairs with and without a folded load. 55 // Instructions with folded loads are usually micro-fused, so they only appear 56 // as two micro-ops when queued in the reservation station. 57 // This multiclass defines the resource usage for variants with and without 58 // folded loads. 59 multiclass SMWriteResPair<X86FoldableSchedWrite SchedRW, 60 ProcResourceKind ExePort, 61 int Lat> { 62 // Register variant is using a single cycle on ExePort. 63 def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; } 64 65 // Memory variant also uses a cycle on MEC_RSV and adds 3 cycles to the 66 // latency. 67 def : WriteRes<SchedRW.Folded, [MEC_RSV, ExePort]> { 68 let Latency = !add(Lat, 3); 69 } 70 } 71 72 // A folded store needs a cycle on MEC_RSV for the store data, but it does not 73 // need an extra port cycle to recompute the address. 74 def : WriteRes<WriteRMW, [MEC_RSV]>; 75 76 def : WriteRes<WriteStore, [IEC_RSV01, MEC_RSV]>; 77 def : WriteRes<WriteLoad, [MEC_RSV]> { let Latency = 3; } 78 def : WriteRes<WriteMove, [IEC_RSV01]>; 79 def : WriteRes<WriteZero, []>; 80 81 defm : SMWriteResPair<WriteALU, IEC_RSV01, 1>; 82 defm : SMWriteResPair<WriteIMul, IEC_RSV1, 3>; 83 defm : SMWriteResPair<WriteShift, IEC_RSV0, 1>; 84 defm : SMWriteResPair<WriteJump, IEC_RSV1, 1>; 85 86 // This is for simple LEAs with one or two input operands. 87 // The complex ones can only execute on port 1, and they require two cycles on 88 // the port to read all inputs. We don't model that. 89 def : WriteRes<WriteLEA, [IEC_RSV1]>; 90 91 // This is quite rough, latency depends on the dividend. 92 def : WriteRes<WriteIDiv, [IEC_RSV01, SMDivider]> { 93 let Latency = 25; 94 let ResourceCycles = [1, 25]; 95 } 96 def : WriteRes<WriteIDivLd, [MEC_RSV, IEC_RSV01, SMDivider]> { 97 let Latency = 29; 98 let ResourceCycles = [1, 1, 25]; 99 } 100 101 // Scalar and vector floating point. 102 defm : SMWriteResPair<WriteFAdd, FPC_RSV1, 3>; 103 defm : SMWriteResPair<WriteFRcp, FPC_RSV0, 5>; 104 defm : SMWriteResPair<WriteFRsqrt, FPC_RSV0, 5>; 105 defm : SMWriteResPair<WriteFSqrt, FPC_RSV0, 15>; 106 defm : SMWriteResPair<WriteCvtF2I, FPC_RSV01, 4>; 107 defm : SMWriteResPair<WriteCvtI2F, FPC_RSV01, 4>; 108 defm : SMWriteResPair<WriteCvtF2F, FPC_RSV01, 4>; 109 defm : SMWriteResPair<WriteFShuffle, FPC_RSV0, 1>; 110 defm : SMWriteResPair<WriteFBlend, FPC_RSV0, 1>; 111 112 // This is quite rough, latency depends on precision 113 def : WriteRes<WriteFMul, [FPC_RSV0, SMFPMultiplier]> { 114 let Latency = 5; 115 let ResourceCycles = [1, 2]; 116 } 117 def : WriteRes<WriteFMulLd, [MEC_RSV, FPC_RSV0, SMFPMultiplier]> { 118 let Latency = 8; 119 let ResourceCycles = [1, 1, 2]; 120 } 121 122 def : WriteRes<WriteFDiv, [FPC_RSV0, SMFPDivider]> { 123 let Latency = 34; 124 let ResourceCycles = [1, 34]; 125 } 126 def : WriteRes<WriteFDivLd, [MEC_RSV, FPC_RSV0, SMFPDivider]> { 127 let Latency = 37; 128 let ResourceCycles = [1, 1, 34]; 129 } 130 131 // Vector integer operations. 132 defm : SMWriteResPair<WriteVecShift, FPC_RSV0, 1>; 133 defm : SMWriteResPair<WriteVecLogic, FPC_RSV01, 1>; 134 defm : SMWriteResPair<WriteVecALU, FPC_RSV01, 1>; 135 defm : SMWriteResPair<WriteVecIMul, FPC_RSV0, 4>; 136 defm : SMWriteResPair<WriteShuffle, FPC_RSV0, 1>; 137 defm : SMWriteResPair<WriteBlend, FPC_RSV0, 1>; 138 defm : SMWriteResPair<WriteMPSAD, FPC_RSV0, 7>; 139 140 // String instructions. 141 // Packed Compare Implicit Length Strings, Return Mask 142 def : WriteRes<WritePCmpIStrM, [FPC_RSV0]> { 143 let Latency = 13; 144 let ResourceCycles = [13]; 145 } 146 def : WriteRes<WritePCmpIStrMLd, [FPC_RSV0, MEC_RSV]> { 147 let Latency = 13; 148 let ResourceCycles = [13, 1]; 149 } 150 151 // Packed Compare Explicit Length Strings, Return Mask 152 def : WriteRes<WritePCmpEStrM, [FPC_RSV0]> { 153 let Latency = 17; 154 let ResourceCycles = [17]; 155 } 156 def : WriteRes<WritePCmpEStrMLd, [FPC_RSV0, MEC_RSV]> { 157 let Latency = 17; 158 let ResourceCycles = [17, 1]; 159 } 160 161 // Packed Compare Implicit Length Strings, Return Index 162 def : WriteRes<WritePCmpIStrI, [FPC_RSV0]> { 163 let Latency = 17; 164 let ResourceCycles = [17]; 165 } 166 def : WriteRes<WritePCmpIStrILd, [FPC_RSV0, MEC_RSV]> { 167 let Latency = 17; 168 let ResourceCycles = [17, 1]; 169 } 170 171 // Packed Compare Explicit Length Strings, Return Index 172 def : WriteRes<WritePCmpEStrI, [FPC_RSV0]> { 173 let Latency = 21; 174 let ResourceCycles = [21]; 175 } 176 def : WriteRes<WritePCmpEStrILd, [FPC_RSV0, MEC_RSV]> { 177 let Latency = 21; 178 let ResourceCycles = [21, 1]; 179 } 180 181 // AES Instructions. 182 def : WriteRes<WriteAESDecEnc, [FPC_RSV0]> { 183 let Latency = 8; 184 let ResourceCycles = [5]; 185 } 186 def : WriteRes<WriteAESDecEncLd, [FPC_RSV0, MEC_RSV]> { 187 let Latency = 8; 188 let ResourceCycles = [5, 1]; 189 } 190 191 def : WriteRes<WriteAESIMC, [FPC_RSV0]> { 192 let Latency = 8; 193 let ResourceCycles = [5]; 194 } 195 def : WriteRes<WriteAESIMCLd, [FPC_RSV0, MEC_RSV]> { 196 let Latency = 8; 197 let ResourceCycles = [5, 1]; 198 } 199 200 def : WriteRes<WriteAESKeyGen, [FPC_RSV0]> { 201 let Latency = 8; 202 let ResourceCycles = [5]; 203 } 204 def : WriteRes<WriteAESKeyGenLd, [FPC_RSV0, MEC_RSV]> { 205 let Latency = 8; 206 let ResourceCycles = [5, 1]; 207 } 208 209 // Carry-less multiplication instructions. 210 def : WriteRes<WriteCLMul, [FPC_RSV0]> { 211 let Latency = 10; 212 let ResourceCycles = [10]; 213 } 214 def : WriteRes<WriteCLMulLd, [FPC_RSV0, MEC_RSV]> { 215 let Latency = 10; 216 let ResourceCycles = [10, 1]; 217 } 218 219 220 def : WriteRes<WriteSystem, [FPC_RSV0]> { let Latency = 100; } 221 def : WriteRes<WriteMicrocoded, [FPC_RSV0]> { let Latency = 100; } 222 def : WriteRes<WriteFence, [MEC_RSV]>; 223 def : WriteRes<WriteNop, []>; 224 225 // AVX is not supported on that architecture, but we should define the basic 226 // scheduling resources anyway. 227 def : WriteRes<WriteIMulH, [FPC_RSV0]>; 228 defm : SMWriteResPair<WriteVarBlend, FPC_RSV0, 1>; 229 defm : SMWriteResPair<WriteFVarBlend, FPC_RSV0, 1>; 230 defm : SMWriteResPair<WriteFShuffle256, FPC_RSV0, 1>; 231 defm : SMWriteResPair<WriteShuffle256, FPC_RSV0, 1>; 232 defm : SMWriteResPair<WriteVarVecShift, FPC_RSV0, 1>; 233 } // SchedModel 234