Home | History | Annotate | Download | only in X86
      1 //=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the machine model for Sandy Bridge to support instruction
     11 // scheduling and other instruction cost heuristics.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 def SandyBridgeModel : SchedMachineModel {
     16   // All x86 instructions are modeled as a single micro-op, and SB can decode 4
     17   // instructions per cycle.
     18   // FIXME: Identify instructions that aren't a single fused micro-op.
     19   let IssueWidth = 4;
     20   let MicroOpBufferSize = 168; // Based on the reorder buffer.
     21   let LoadLatency = 4;
     22   let MispredictPenalty = 16;
     23 
     24   // Based on the LSD (loop-stream detector) queue size.
     25   let LoopMicroOpBufferSize = 28;
     26 
     27   // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow
     28   // the scheduler to assign a default model to unrecognized opcodes.
     29   let CompleteModel = 0;
     30 }
     31 
     32 let SchedModel = SandyBridgeModel in {
     33 
     34 // Sandy Bridge can issue micro-ops to 6 different ports in one cycle.
     35 
     36 // Ports 0, 1, and 5 handle all computation.
     37 def SBPort0 : ProcResource<1>;
     38 def SBPort1 : ProcResource<1>;
     39 def SBPort5 : ProcResource<1>;
     40 
     41 // Ports 2 and 3 are identical. They handle loads and the address half of
     42 // stores.
     43 def SBPort23 : ProcResource<2>;
     44 
     45 // Port 4 gets the data half of stores. Store data can be available later than
     46 // the store address, but since we don't model the latency of stores, we can
     47 // ignore that.
     48 def SBPort4 : ProcResource<1>;
     49 
     50 // Many micro-ops are capable of issuing on multiple ports.
     51 def SBPort05  : ProcResGroup<[SBPort0, SBPort5]>;
     52 def SBPort15  : ProcResGroup<[SBPort1, SBPort5]>;
     53 def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>;
     54 
     55 // 54 Entry Unified Scheduler
     56 def SBPortAny : ProcResGroup<[SBPort0, SBPort1, SBPort23, SBPort4, SBPort5]> {
     57   let BufferSize=54;
     58 }
     59 
     60 // Integer division issued on port 0.
     61 def SBDivider : ProcResource<1>;
     62 
     63 // Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
     64 // cycles after the memory operand.
     65 def : ReadAdvance<ReadAfterLd, 4>;
     66 
     67 // Many SchedWrites are defined in pairs with and without a folded load.
     68 // Instructions with folded loads are usually micro-fused, so they only appear
     69 // as two micro-ops when queued in the reservation station.
     70 // This multiclass defines the resource usage for variants with and without
     71 // folded loads.
     72 multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
     73                           ProcResourceKind ExePort,
     74                           int Lat> {
     75   // Register variant is using a single cycle on ExePort.
     76   def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
     77 
     78   // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
     79   // latency.
     80   def : WriteRes<SchedRW.Folded, [SBPort23, ExePort]> {
     81      let Latency = !add(Lat, 4);
     82   }
     83 }
     84 
     85 // A folded store needs a cycle on port 4 for the store data, but it does not
     86 // need an extra port 2/3 cycle to recompute the address.
     87 def : WriteRes<WriteRMW, [SBPort4]>;
     88 
     89 def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
     90 def : WriteRes<WriteLoad,  [SBPort23]> { let Latency = 4; }
     91 def : WriteRes<WriteMove,  [SBPort015]>;
     92 def : WriteRes<WriteZero,  []>;
     93 
     94 defm : SBWriteResPair<WriteALU,   SBPort015, 1>;
     95 defm : SBWriteResPair<WriteIMul,  SBPort1,   3>;
     96 def  : WriteRes<WriteIMulH, []> { let Latency = 3; }
     97 defm : SBWriteResPair<WriteShift, SBPort05,  1>;
     98 defm : SBWriteResPair<WriteJump,  SBPort5,   1>;
     99 
    100 // This is for simple LEAs with one or two input operands.
    101 // The complex ones can only execute on port 1, and they require two cycles on
    102 // the port to read all inputs. We don't model that.
    103 def : WriteRes<WriteLEA, [SBPort15]>;
    104 
    105 // This is quite rough, latency depends on the dividend.
    106 def : WriteRes<WriteIDiv, [SBPort0, SBDivider]> {
    107   let Latency = 25;
    108   let ResourceCycles = [1, 10];
    109 }
    110 def : WriteRes<WriteIDivLd, [SBPort23, SBPort0, SBDivider]> {
    111   let Latency = 29;
    112   let ResourceCycles = [1, 1, 10];
    113 }
    114 
    115 // Scalar and vector floating point.
    116 defm : SBWriteResPair<WriteFAdd,   SBPort1, 3>;
    117 defm : SBWriteResPair<WriteFMul,   SBPort0, 5>;
    118 defm : SBWriteResPair<WriteFDiv,   SBPort0, 12>; // 10-14 cycles.
    119 defm : SBWriteResPair<WriteFRcp,   SBPort0, 5>;
    120 defm : SBWriteResPair<WriteFRsqrt, SBPort0, 5>;
    121 defm : SBWriteResPair<WriteFSqrt,  SBPort0, 15>;
    122 defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>;
    123 defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>;
    124 defm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>;
    125 defm : SBWriteResPair<WriteFShuffle,  SBPort5,  1>;
    126 defm : SBWriteResPair<WriteFBlend,  SBPort05,  1>;
    127 def : WriteRes<WriteFVarBlend, [SBPort0, SBPort5]> {
    128   let Latency = 2;
    129   let ResourceCycles = [1, 1];
    130 }
    131 def : WriteRes<WriteFVarBlendLd, [SBPort0, SBPort5, SBPort23]> {
    132   let Latency = 6;
    133   let ResourceCycles = [1, 1, 1];
    134 }
    135 
    136 // Vector integer operations.
    137 defm : SBWriteResPair<WriteVecShift, SBPort05,  1>;
    138 defm : SBWriteResPair<WriteVecLogic, SBPort015, 1>;
    139 defm : SBWriteResPair<WriteVecALU,   SBPort15,  1>;
    140 defm : SBWriteResPair<WriteVecIMul,  SBPort0,   5>;
    141 defm : SBWriteResPair<WriteShuffle,  SBPort15,  1>;
    142 defm : SBWriteResPair<WriteBlend,  SBPort15,  1>;
    143 def : WriteRes<WriteVarBlend, [SBPort1, SBPort5]> {
    144   let Latency = 2;
    145   let ResourceCycles = [1, 1];
    146 }
    147 def : WriteRes<WriteVarBlendLd, [SBPort1, SBPort5, SBPort23]> {
    148   let Latency = 6;
    149   let ResourceCycles = [1, 1, 1];
    150 }
    151 def : WriteRes<WriteMPSAD, [SBPort0, SBPort1, SBPort5]> {
    152   let Latency = 6;
    153   let ResourceCycles = [1, 1, 1];
    154 }
    155 def : WriteRes<WriteMPSADLd, [SBPort0, SBPort1, SBPort5, SBPort23]> {
    156   let Latency = 6;
    157   let ResourceCycles = [1, 1, 1, 1];
    158 }
    159 
    160 // String instructions.
    161 // Packed Compare Implicit Length Strings, Return Mask
    162 def : WriteRes<WritePCmpIStrM, [SBPort015]> {
    163   let Latency = 11;
    164   let ResourceCycles = [3];
    165 }
    166 def : WriteRes<WritePCmpIStrMLd, [SBPort015, SBPort23]> {
    167   let Latency = 11;
    168   let ResourceCycles = [3, 1];
    169 }
    170 
    171 // Packed Compare Explicit Length Strings, Return Mask
    172 def : WriteRes<WritePCmpEStrM, [SBPort015]> {
    173   let Latency = 11;
    174   let ResourceCycles = [8];
    175 }
    176 def : WriteRes<WritePCmpEStrMLd, [SBPort015, SBPort23]> {
    177   let Latency = 11;
    178   let ResourceCycles = [7, 1];
    179 }
    180 
    181 // Packed Compare Implicit Length Strings, Return Index
    182 def : WriteRes<WritePCmpIStrI, [SBPort015]> {
    183   let Latency = 3;
    184   let ResourceCycles = [3];
    185 }
    186 def : WriteRes<WritePCmpIStrILd, [SBPort015, SBPort23]> {
    187   let Latency = 3;
    188   let ResourceCycles = [3, 1];
    189 }
    190 
    191 // Packed Compare Explicit Length Strings, Return Index
    192 def : WriteRes<WritePCmpEStrI, [SBPort015]> {
    193   let Latency = 4;
    194   let ResourceCycles = [8];
    195 }
    196 def : WriteRes<WritePCmpEStrILd, [SBPort015, SBPort23]> {
    197   let Latency = 4;
    198   let ResourceCycles = [7, 1];
    199 }
    200 
    201 // AES Instructions.
    202 def : WriteRes<WriteAESDecEnc, [SBPort015]> {
    203   let Latency = 8;
    204   let ResourceCycles = [2];
    205 }
    206 def : WriteRes<WriteAESDecEncLd, [SBPort015, SBPort23]> {
    207   let Latency = 8;
    208   let ResourceCycles = [2, 1];
    209 }
    210 
    211 def : WriteRes<WriteAESIMC, [SBPort015]> {
    212   let Latency = 8;
    213   let ResourceCycles = [2];
    214 }
    215 def : WriteRes<WriteAESIMCLd, [SBPort015, SBPort23]> {
    216   let Latency = 8;
    217   let ResourceCycles = [2, 1];
    218 }
    219 
    220 def : WriteRes<WriteAESKeyGen, [SBPort015]> {
    221   let Latency = 8;
    222   let ResourceCycles = [11];
    223 }
    224 def : WriteRes<WriteAESKeyGenLd, [SBPort015, SBPort23]> {
    225   let Latency = 8;
    226   let ResourceCycles = [10, 1];
    227 }
    228 
    229 // Carry-less multiplication instructions.
    230 def : WriteRes<WriteCLMul, [SBPort015]> {
    231   let Latency = 14;
    232   let ResourceCycles = [18];
    233 }
    234 def : WriteRes<WriteCLMulLd, [SBPort015, SBPort23]> {
    235   let Latency = 14;
    236   let ResourceCycles = [17, 1];
    237 }
    238 
    239 
    240 def : WriteRes<WriteSystem,     [SBPort015]> { let Latency = 100; }
    241 def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; }
    242 def : WriteRes<WriteFence, [SBPort23, SBPort4]>;
    243 def : WriteRes<WriteNop, []>;
    244 
    245 // AVX2 is not supported on that architecture, but we should define the basic
    246 // scheduling resources anyway.
    247 defm : SBWriteResPair<WriteFShuffle256, SBPort0,  1>;
    248 defm : SBWriteResPair<WriteShuffle256, SBPort0,  1>;
    249 defm : SBWriteResPair<WriteVarVecShift, SBPort0,  1>;
    250 } // SchedModel
    251