Home | History | Annotate | Download | only in PowerPC
      1 //===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the itinerary class data for the POWER7 processor.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 // Primary reference:
     15 // IBM POWER7 multicore server processor
     16 // B. Sinharoy, et al.
     17 // IBM J. Res. & Dev. (55) 3. May/June 2011.
     18 
     19 // Scheduling for the P7 involves tracking two types of resources:
     20 //  1. The dispatch bundle slots
     21 //  2. The functional unit resources
     22 
     23 // Dispatch units:
     24 def P7_DU1    : FuncUnit;
     25 def P7_DU2    : FuncUnit;
     26 def P7_DU3    : FuncUnit;
     27 def P7_DU4    : FuncUnit;
     28 def P7_DU5    : FuncUnit;
     29 def P7_DU6    : FuncUnit;
     30 
     31 def P7_LS1    : FuncUnit; // Load/Store pipeline 1
     32 def P7_LS2    : FuncUnit; // Load/Store pipeline 2
     33 
     34 def P7_FX1    : FuncUnit; // FX pipeline 1
     35 def P7_FX2    : FuncUnit; // FX pipeline 2
     36 
     37 // VS pipeline 1 (vector integer ops. always here)
     38 def P7_VS1    : FuncUnit; // VS pipeline 1
     39 // VS pipeline 2 (128-bit stores and perms. here)
     40 def P7_VS2    : FuncUnit; // VS pipeline 2
     41 
     42 def P7_CRU    : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
     43 def P7_BRU    : FuncUnit; // BR unit
     44 
     45 // Notes:
     46 // Each LSU pipeline can also execute FX add and logical instructions.
     47 // Each LSU pipeline can complete a load or store in one cycle.
     48 //
     49 // Each store is broken into two parts, AGEN goes to the LSU while a
     50 // "data steering" op. goes to the FXU or VSU.
     51 //
     52 // FX loads have a two cycle load-to-use latency (so one "bubble" cycle).
     53 // VSU loads have a three cycle load-to-use latency (so two "bubble" cycle).
     54 //
     55 // Frequent FX ops. take only one cycle and results can be used again in the
     56 // next cycle (there is a self-bypass). Getting results from the other FX
     57 // pipeline takes an additional cycle.
     58 //
     59 // The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles
     60 // (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops.
     61 // Dispatch of an instruction to VS1 that uses four single prec. inputs
     62 // (either to a float or XC op). prevents dispatch in that cycle to VS2 of any
     63 // floating point instruction.
     64 //
     65 // The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles
     66 // (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline
     67 // (unlike on the POWER6).
     68 //
     69 // FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP
     70 // share the same write-back, and have a 5-cycle latency difference, so the
     71 // IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP
     72 // op. has been dispatched to VS1.
     73 //
     74 // Three cycles after an L1 cache hit, a dependent VSU instruction can issue.
     75 //
     76 // Instruction dispatch groups have (at most) four non-branch instructions, and
     77 // two branches. Unlike on the POWER4/5, a branch does not automatically
     78 // end the dispatch group, but a second branch must be the last in the group.
     79 
     80 def P7Itineraries : ProcessorItineraries<
     81   [P7_DU1, P7_DU2, P7_DU3, P7_DU4, P7_DU5, P7_DU6,
     82    P7_LS1, P7_LS2, P7_FX1, P7_FX2, P7_VS1, P7_VS2, P7_CRU, P7_BRU], [], [
     83   InstrItinData<IIC_IntSimple   , [InstrStage<1, [P7_DU1, P7_DU2,
     84                                                   P7_DU3, P7_DU4], 0>,
     85                                    InstrStage<1, [P7_FX1, P7_FX2,
     86                                                   P7_LS1, P7_LS2]>],
     87                                   [1, 1, 1]>,
     88   InstrItinData<IIC_IntGeneral  , [InstrStage<1, [P7_DU1, P7_DU2,
     89                                                   P7_DU3, P7_DU4], 0>,
     90                                    InstrStage<1, [P7_FX1, P7_FX2]>],
     91                                   [1, 1, 1]>,
     92   InstrItinData<IIC_IntISEL,      [InstrStage<1, [P7_DU1], 0>,
     93                                    InstrStage<1, [P7_FX1, P7_FX2], 0>,
     94                                    InstrStage<1, [P7_BRU]>],
     95                                   [1, 1, 1, 1]>,
     96   InstrItinData<IIC_IntCompare  , [InstrStage<1, [P7_DU1, P7_DU2,
     97                                                   P7_DU3, P7_DU4], 0>,
     98                                    InstrStage<1, [P7_FX1, P7_FX2]>],
     99                                   [1, 1, 1]>,
    100   // FIXME: Add record-form itinerary data.
    101   InstrItinData<IIC_IntDivW     , [InstrStage<1, [P7_DU1], 0>,
    102                                    InstrStage<1, [P7_DU2], 0>,
    103                                    InstrStage<36, [P7_FX1, P7_FX2]>],
    104                                   [36, 1, 1]>,
    105   InstrItinData<IIC_IntDivD     , [InstrStage<1, [P7_DU1], 0>,
    106                                    InstrStage<1, [P7_DU2], 0>,
    107                                    InstrStage<68, [P7_FX1, P7_FX2]>],
    108                                   [68, 1, 1]>,
    109   InstrItinData<IIC_IntMulHW    , [InstrStage<1, [P7_DU1, P7_DU2,
    110                                                   P7_DU3, P7_DU4], 0>,
    111                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    112                                   [4, 1, 1]>,
    113   InstrItinData<IIC_IntMulHWU   , [InstrStage<1, [P7_DU1, P7_DU2,
    114                                                   P7_DU3, P7_DU4], 0>,
    115                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    116                                   [4, 1, 1]>,
    117   InstrItinData<IIC_IntMulLI    , [InstrStage<1, [P7_DU1, P7_DU2,
    118                                                   P7_DU3, P7_DU4], 0>,
    119                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    120                                   [4, 1, 1]>,
    121   InstrItinData<IIC_IntRotate   , [InstrStage<1, [P7_DU1, P7_DU2,
    122                                                   P7_DU3, P7_DU4], 0>,
    123                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    124                                    [1, 1, 1]>,
    125   InstrItinData<IIC_IntRotateD  , [InstrStage<1, [P7_DU1, P7_DU2,
    126                                                   P7_DU3, P7_DU4], 0>,
    127                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    128                                    [1, 1, 1]>,
    129   InstrItinData<IIC_IntShift    , [InstrStage<1, [P7_DU1, P7_DU2,
    130                                                   P7_DU3, P7_DU4], 0>,
    131                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    132                                   [1, 1, 1]>,
    133   InstrItinData<IIC_IntTrapW    , [InstrStage<1, [P7_DU1, P7_DU2,
    134                                                   P7_DU3, P7_DU4], 0>,
    135                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    136                                   [1, 1]>,
    137   InstrItinData<IIC_IntTrapD    , [InstrStage<1, [P7_DU1, P7_DU2,
    138                                                   P7_DU3, P7_DU4], 0>,
    139                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    140                                   [1, 1]>,
    141   InstrItinData<IIC_BrB         , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
    142                                    InstrStage<1, [P7_BRU]>],
    143                                   [3, 1, 1]>,
    144   InstrItinData<IIC_BrCR        , [InstrStage<1, [P7_DU1], 0>,
    145                                    InstrStage<1, [P7_CRU]>],
    146                                   [3, 1, 1]>,
    147   InstrItinData<IIC_BrMCR       , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
    148                                    InstrStage<1, [P7_BRU]>],
    149                                   [3, 1, 1]>,
    150   InstrItinData<IIC_BrMCRX      , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
    151                                    InstrStage<1, [P7_BRU]>],
    152                                   [3, 1, 1]>,
    153   InstrItinData<IIC_LdStLoad    , [InstrStage<1, [P7_DU1, P7_DU2,
    154                                                   P7_DU3, P7_DU4], 0>,
    155                                    InstrStage<1, [P7_LS1, P7_LS2]>],
    156                                   [2, 1, 1]>,
    157   InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P7_DU1], 0>,
    158                                    InstrStage<1, [P7_DU2], 0>,
    159                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
    160                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    161                                   [2, 2, 1, 1]>,
    162   InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P7_DU1], 0>,
    163                                    InstrStage<1, [P7_DU2], 0>,
    164                                    InstrStage<1, [P7_DU3], 0>,
    165                                    InstrStage<1, [P7_DU4], 0>,
    166                                    InstrStage<1, [P7_FX1, P7_FX2]>,
    167                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
    168                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    169                                   [3, 3, 1, 1]>,
    170   InstrItinData<IIC_LdStLD      , [InstrStage<1, [P7_DU1, P7_DU2,
    171                                                   P7_DU3, P7_DU4], 0>,
    172                                    InstrStage<1, [P7_LS1, P7_LS2]>],
    173                                   [2, 1, 1]>,
    174   InstrItinData<IIC_LdStLDU     , [InstrStage<1, [P7_DU1], 0>,
    175                                    InstrStage<1, [P7_DU2], 0>,
    176                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
    177                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    178                                   [2, 2, 1, 1]>,
    179   InstrItinData<IIC_LdStLDUX    , [InstrStage<1, [P7_DU1], 0>,
    180                                    InstrStage<1, [P7_DU2], 0>,
    181                                    InstrStage<1, [P7_DU3], 0>,
    182                                    InstrStage<1, [P7_DU4], 0>,
    183                                    InstrStage<1, [P7_FX1, P7_FX2]>,
    184                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
    185                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    186                                   [3, 3, 1, 1]>,
    187   InstrItinData<IIC_LdStLFD     , [InstrStage<1, [P7_DU1, P7_DU2,
    188                                                   P7_DU3, P7_DU4], 0>,
    189                                    InstrStage<1, [P7_LS1, P7_LS2]>],
    190                                   [3, 1, 1]>,
    191   InstrItinData<IIC_LdStLVecX   , [InstrStage<1, [P7_DU1, P7_DU2,
    192                                                   P7_DU3, P7_DU4], 0>,
    193                                    InstrStage<1, [P7_LS1, P7_LS2]>],
    194                                   [3, 1, 1]>,
    195   InstrItinData<IIC_LdStLFDU    , [InstrStage<1, [P7_DU1], 0>,
    196                                    InstrStage<1, [P7_DU2], 0>,
    197                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
    198                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    199                                   [3, 3, 1, 1]>,
    200   InstrItinData<IIC_LdStLFDUX   , [InstrStage<1, [P7_DU1], 0>,
    201                                    InstrStage<1, [P7_DU2], 0>,
    202                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
    203                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    204                                   [3, 3, 1, 1]>,
    205   InstrItinData<IIC_LdStLHA     , [InstrStage<1, [P7_DU1], 0>,
    206                                    InstrStage<1, [P7_DU2], 0>,
    207                                    InstrStage<1, [P7_LS1, P7_LS2]>,
    208                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    209                                   [3, 1, 1]>,
    210   InstrItinData<IIC_LdStLHAU    , [InstrStage<1, [P7_DU1], 0>,
    211                                    InstrStage<1, [P7_DU2], 0>,
    212                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
    213                                    InstrStage<1, [P7_FX1, P7_FX2]>,
    214                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    215                                   [4, 4, 1, 1]>,
    216   InstrItinData<IIC_LdStLHAUX   , [InstrStage<1, [P7_DU1], 0>,
    217                                    InstrStage<1, [P7_DU2], 0>,
    218                                    InstrStage<1, [P7_DU3], 0>,
    219                                    InstrStage<1, [P7_DU4], 0>,
    220                                    InstrStage<1, [P7_FX1, P7_FX2]>,
    221                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
    222                                    InstrStage<1, [P7_FX1, P7_FX2]>,
    223                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    224                                   [4, 4, 1, 1]>,
    225   InstrItinData<IIC_LdStLWA     , [InstrStage<1, [P7_DU1], 0>,
    226                                    InstrStage<1, [P7_DU2], 0>,
    227                                    InstrStage<1, [P7_LS1, P7_LS2]>,
    228                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    229                                   [3, 1, 1]>,
    230   InstrItinData<IIC_LdStLWARX,    [InstrStage<1, [P7_DU1], 0>,
    231                                    InstrStage<1, [P7_DU2], 0>,
    232                                    InstrStage<1, [P7_DU3], 0>,
    233                                    InstrStage<1, [P7_DU4], 0>,
    234                                    InstrStage<1, [P7_LS1, P7_LS2]>],
    235                                   [3, 1, 1]>,
    236   InstrItinData<IIC_LdStLDARX,    [InstrStage<1, [P7_DU1], 0>,
    237                                    InstrStage<1, [P7_DU2], 0>,
    238                                    InstrStage<1, [P7_DU3], 0>,
    239                                    InstrStage<1, [P7_DU4], 0>,
    240                                    InstrStage<1, [P7_LS1, P7_LS2]>],
    241                                   [3, 1, 1]>,
    242   InstrItinData<IIC_LdStLMW     , [InstrStage<1, [P7_DU1, P7_DU2,
    243                                                   P7_DU3, P7_DU4], 0>,
    244                                    InstrStage<1, [P7_LS1, P7_LS2]>],
    245                                   [2, 1, 1]>,
    246   InstrItinData<IIC_LdStStore   , [InstrStage<1, [P7_DU1, P7_DU2,
    247                                                   P7_DU3, P7_DU4], 0>,
    248                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
    249                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    250                                   [1, 1, 1]>,
    251   InstrItinData<IIC_LdStSTD     , [InstrStage<1, [P7_DU1, P7_DU2,
    252                                                   P7_DU3, P7_DU4], 0>,
    253                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
    254                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    255                                   [1, 1, 1]>,
    256   InstrItinData<IIC_LdStSTDU    , [InstrStage<1, [P7_DU1], 0>,
    257                                    InstrStage<1, [P7_DU2], 0>,
    258                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
    259                                    InstrStage<1, [P7_FX1, P7_FX2]>,
    260                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    261                                   [2, 1, 1, 1]>,
    262   InstrItinData<IIC_LdStSTDUX   , [InstrStage<1, [P7_DU1], 0>,
    263                                    InstrStage<1, [P7_DU2], 0>,
    264                                    InstrStage<1, [P7_DU3], 0>,
    265                                    InstrStage<1, [P7_DU4], 0>,
    266                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
    267                                    InstrStage<1, [P7_FX1, P7_FX2]>,
    268                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    269                                   [2, 1, 1, 1]>,
    270   InstrItinData<IIC_LdStSTFD    , [InstrStage<1, [P7_DU1, P7_DU2,
    271                                                   P7_DU3, P7_DU4], 0>,
    272                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
    273                                    InstrStage<1, [P7_VS1, P7_VS2]>],
    274                                   [1, 1, 1]>,
    275   InstrItinData<IIC_LdStSTFDU   , [InstrStage<1, [P7_DU1], 0>,
    276                                    InstrStage<1, [P7_DU2], 0>,
    277                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
    278                                    InstrStage<1, [P7_FX1, P7_FX2], 0>,
    279                                    InstrStage<1, [P7_VS1, P7_VS2]>],
    280                                   [2, 1, 1, 1]>,
    281   InstrItinData<IIC_LdStSTVEBX  , [InstrStage<1, [P7_DU1, P7_DU2,
    282                                                   P7_DU3, P7_DU4], 0>,
    283                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
    284                                    InstrStage<1, [P7_VS2]>],
    285                                   [1, 1, 1]>,
    286   InstrItinData<IIC_LdStSTDCX   , [InstrStage<1, [P7_DU1], 0>,
    287                                    InstrStage<1, [P7_DU2], 0>,
    288                                    InstrStage<1, [P7_DU3], 0>,
    289                                    InstrStage<1, [P7_DU4], 0>,
    290                                    InstrStage<1, [P7_LS1, P7_LS2]>],
    291                                   [1, 1, 1]>,
    292   InstrItinData<IIC_LdStSTWCX   , [InstrStage<1, [P7_DU1], 0>,
    293                                    InstrStage<1, [P7_DU2], 0>,
    294                                    InstrStage<1, [P7_DU3], 0>,
    295                                    InstrStage<1, [P7_DU4], 0>,
    296                                    InstrStage<1, [P7_LS1, P7_LS2]>],
    297                                   [1, 1, 1]>,
    298   InstrItinData<IIC_BrMCRX      , [InstrStage<1, [P7_DU1], 0>,
    299                                    InstrStage<1, [P7_DU2], 0>,
    300                                    InstrStage<1, [P7_DU3], 0>,
    301                                    InstrStage<1, [P7_DU4], 0>,
    302                                    InstrStage<1, [P7_CRU]>,
    303                                    InstrStage<1, [P7_FX1, P7_FX2]>],
    304                                   [3, 1]>, // mtcr
    305   InstrItinData<IIC_SprMFCR     , [InstrStage<1, [P7_DU1], 0>,
    306                                    InstrStage<1, [P7_CRU]>],
    307                                   [6, 1]>,
    308   InstrItinData<IIC_SprMFCRF    , [InstrStage<1, [P7_DU1], 0>,
    309                                    InstrStage<1, [P7_CRU]>],
    310                                   [3, 1]>,
    311   InstrItinData<IIC_SprMTSPR    , [InstrStage<1, [P7_DU1], 0>,
    312                                    InstrStage<1, [P7_FX1]>],
    313                                   [4, 1]>, // mtctr
    314   InstrItinData<IIC_FPGeneral   , [InstrStage<1, [P7_DU1, P7_DU2,
    315                                                   P7_DU3, P7_DU4], 0>,
    316                                    InstrStage<1, [P7_VS1, P7_VS2]>],
    317                                   [5, 1, 1]>,
    318   InstrItinData<IIC_FPAddSub    , [InstrStage<1, [P7_DU1, P7_DU2,
    319                                                   P7_DU3, P7_DU4], 0>,
    320                                    InstrStage<1, [P7_VS1, P7_VS2]>],
    321                                   [5, 1, 1]>,
    322   InstrItinData<IIC_FPCompare   , [InstrStage<1, [P7_DU1, P7_DU2,
    323                                                   P7_DU3, P7_DU4], 0>,
    324                                    InstrStage<1, [P7_VS1, P7_VS2]>],
    325                                   [8, 1, 1]>,
    326   InstrItinData<IIC_FPDivD      , [InstrStage<1, [P7_DU1, P7_DU2,
    327                                                   P7_DU3, P7_DU4], 0>,
    328                                    InstrStage<1, [P7_VS1, P7_VS2]>],
    329                                   [33, 1, 1]>,
    330   InstrItinData<IIC_FPDivS      , [InstrStage<1, [P7_DU1, P7_DU2,
    331                                                   P7_DU3, P7_DU4], 0>,
    332                                    InstrStage<1, [P7_VS1, P7_VS2]>],
    333                                   [27, 1, 1]>,
    334   InstrItinData<IIC_FPSqrtD     , [InstrStage<1, [P7_DU1, P7_DU2,
    335                                                   P7_DU3, P7_DU4], 0>,
    336                                    InstrStage<1, [P7_VS1, P7_VS2]>],
    337                                   [44, 1, 1]>,
    338   InstrItinData<IIC_FPSqrtS     , [InstrStage<1, [P7_DU1, P7_DU2,
    339                                                   P7_DU3, P7_DU4], 0>,
    340                                    InstrStage<1, [P7_VS1, P7_VS2]>],
    341                                   [32, 1, 1]>,
    342   InstrItinData<IIC_FPFused     , [InstrStage<1, [P7_DU1, P7_DU2,
    343                                                   P7_DU3, P7_DU4], 0>,
    344                                    InstrStage<1, [P7_VS1, P7_VS2]>],
    345                                   [5, 1, 1, 1]>,
    346   InstrItinData<IIC_FPRes       , [InstrStage<1, [P7_DU1, P7_DU2,
    347                                                   P7_DU3, P7_DU4], 0>,
    348                                    InstrStage<1, [P7_VS1, P7_VS2]>],
    349                                   [5, 1, 1]>,
    350   InstrItinData<IIC_VecGeneral  , [InstrStage<1, [P7_DU1], 0>,
    351                                    InstrStage<1, [P7_VS1]>],
    352                                   [2, 1, 1]>,
    353   InstrItinData<IIC_VecVSL      , [InstrStage<1, [P7_DU1], 0>,
    354                                    InstrStage<1, [P7_VS1]>],
    355                                   [2, 1, 1]>,
    356   InstrItinData<IIC_VecVSR      , [InstrStage<1, [P7_DU1], 0>,
    357                                    InstrStage<1, [P7_VS1]>],
    358                                   [2, 1, 1]>,
    359   InstrItinData<IIC_VecFP       , [InstrStage<1, [P7_DU1], 0>,
    360                                    InstrStage<1, [P7_VS1, P7_VS2]>],
    361                                   [6, 1, 1]>,
    362   InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P7_DU1], 0>,
    363                                    InstrStage<1, [P7_VS1, P7_VS2]>],
    364                                   [6, 1, 1]>,
    365   InstrItinData<IIC_VecFPRound  , [InstrStage<1, [P7_DU1], 0>,
    366                                    InstrStage<1, [P7_VS1, P7_VS2]>],
    367                                   [6, 1, 1]>,
    368   InstrItinData<IIC_VecComplex  , [InstrStage<1, [P7_DU1], 0>,
    369                                    InstrStage<1, [P7_VS1]>],
    370                                   [7, 1, 1]>,
    371   InstrItinData<IIC_VecPerm     , [InstrStage<1, [P7_DU1, P7_DU2], 0>,
    372                                    InstrStage<1, [P7_VS2]>],
    373                                   [3, 1, 1]>
    374 ]>;
    375 
    376 // ===---------------------------------------------------------------------===//
    377 // P7 machine model for scheduling and other instruction cost heuristics.
    378 
    379 def P7Model : SchedMachineModel {
    380   let IssueWidth = 6;  // 4 (non-branch) instructions are dispatched per cycle.
    381                        // Note that the dispatch bundle size is 6 (including
    382                        // branches), but the total internal issue bandwidth per
    383                        // cycle (from all queues) is 8.
    384 
    385   let MinLatency = 0;  // Out-of-order dispatch.
    386   let LoadLatency = 3; // Optimistic load latency assuming bypass.
    387                        // This is overriden by OperandCycles if the
    388                        // Itineraries are queried instead.
    389   let MispredictPenalty = 16;
    390 
    391   // Try to make sure we have at least 10 dispatch groups in a loop.
    392   let LoopMicroOpBufferSize = 40;
    393 
    394   let Itineraries = P7Itineraries;
    395 }
    396 
    397