Home | History | Annotate | Download | only in PowerPC
      1 //===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the itinerary class data for the POWER8 processor.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 // Scheduling for the P8 involves tracking two types of resources:
     15 //  1. The dispatch bundle slots
     16 //  2. The functional unit resources
     17 
     18 // Dispatch units:
     19 def P8_DU1    : FuncUnit;
     20 def P8_DU2    : FuncUnit;
     21 def P8_DU3    : FuncUnit;
     22 def P8_DU4    : FuncUnit;
     23 def P8_DU5    : FuncUnit;
     24 def P8_DU6    : FuncUnit;
     25 def P8_DU7    : FuncUnit; // Only branch instructions will use DU7,DU8
     26 def P8_DU8    : FuncUnit;
     27 
     28 // 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
     29 
     30 def P8_LU1     : FuncUnit; // Loads or fixed-point operations 1
     31 def P8_LU2     : FuncUnit; // Loads or fixed-point operations 2
     32 
     33 // Load/Store pipelines can handle Stores, fixed-point loads, and simple
     34 // fixed-point operations.
     35 def P8_LSU1    : FuncUnit; // Load/Store pipeline 1
     36 def P8_LSU2    : FuncUnit; // Load/Store pipeline 2
     37 
     38 // Fixed Point unit
     39 def P8_FXU1    : FuncUnit; // FX pipeline 1
     40 def P8_FXU2    : FuncUnit; // FX pipeline 2
     41 
     42 // The Floating-Point Unit (FPU) and Vector Media Extension (VMX) units
     43 // are combined on P7 and newer into a Vector Scalar Unit (VSU).
     44 // The P8 Instruction latency documents still refers to the unit as the
     45 // FPU, so keep in mind that FPU==VSU.
     46 // In contrast to the P7, the VMX units on P8 are symmetric, so no need to
     47 // split vector integer ops or 128-bit load/store/perms to the specific units.
     48 def P8_FPU1    : FuncUnit; // VS pipeline 1
     49 def P8_FPU2    : FuncUnit; // VS pipeline 2
     50 
     51 def P8_CRU    : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
     52 def P8_BRU    : FuncUnit; // BR unit
     53 
     54 def P8Itineraries : ProcessorItineraries<
     55   [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6, P8_DU7, P8_DU8,
     56    P8_LU1, P8_LU2, P8_LSU1, P8_LSU2, P8_FXU1, P8_FXU2,
     57    P8_FPU1, P8_FPU2, P8_CRU, P8_BRU], [], [
     58   InstrItinData<IIC_IntSimple   , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
     59                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
     60                                    InstrStage<1, [P8_FXU1, P8_FXU2,
     61                                                   P8_LU1, P8_LU2,
     62                                                   P8_LSU1, P8_LSU2]>],
     63                                   [1, 1, 1]>,
     64   InstrItinData<IIC_IntGeneral  , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
     65                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
     66                                    InstrStage<1, [P8_FXU1, P8_FXU2, P8_LU1,
     67                                                   P8_LU2, P8_LSU1, P8_LSU2]>],
     68                                   [1, 1, 1]>,
     69   InstrItinData<IIC_IntISEL,      [InstrStage<1, [P8_DU1], 0>,
     70                                    InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
     71                                    InstrStage<1, [P8_BRU]>],
     72                                   [1, 1, 1, 1]>,
     73   InstrItinData<IIC_IntCompare  , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
     74                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
     75                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
     76                                   [1, 1, 1]>,
     77   InstrItinData<IIC_IntDivW     , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
     78                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
     79                                    InstrStage<15, [P8_FXU1, P8_FXU2]>],
     80                                   [15, 1, 1]>,
     81   InstrItinData<IIC_IntDivD     , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
     82                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
     83                                    InstrStage<23, [P8_FXU1, P8_FXU2]>],
     84                                   [23, 1, 1]>,
     85   InstrItinData<IIC_IntMulHW    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
     86                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
     87                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
     88                                   [4, 1, 1]>,
     89   InstrItinData<IIC_IntMulHWU   , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
     90                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
     91                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
     92                                   [4, 1, 1]>,
     93   InstrItinData<IIC_IntMulLI    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
     94                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
     95                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
     96                                   [4, 1, 1]>,
     97   InstrItinData<IIC_IntRotate   , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
     98                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
     99                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    100                                    [1, 1, 1]>,
    101   InstrItinData<IIC_IntRotateD  , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    102                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    103                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    104                                    [1, 1, 1]>,
    105   InstrItinData<IIC_IntShift    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    106                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    107                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    108                                   [1, 1, 1]>,
    109   InstrItinData<IIC_IntTrapW    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    110                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    111                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    112                                   [1, 1]>,
    113   InstrItinData<IIC_IntTrapD    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    114                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    115                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    116                                   [1, 1]>,
    117   InstrItinData<IIC_BrB         , [InstrStage<1, [P8_DU7, P8_DU8], 0>,
    118                                    InstrStage<1, [P8_BRU]>],
    119                                   [3, 1, 1]>,
    120   // FIXME - the Br* groups below are not branch related, so should probably
    121   // be renamed.
    122   // IIC_BrCR consists of the cr* instructions.  (crand,crnor,creqv, etc).
    123   // and should be 'First' in dispatch.
    124   InstrItinData<IIC_BrCR        , [InstrStage<1, [P8_DU1], 0>,
    125                                    InstrStage<1, [P8_CRU]>],
    126                                   [3, 1, 1]>,
    127   // IIC_BrMCR consists of the mcrf instruction.
    128   InstrItinData<IIC_BrMCR       , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    129                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    130                                    InstrStage<1, [P8_CRU]>],
    131                                   [3, 1, 1]>,
    132   // IIC_BrMCRX consists of mcrxr (obsolete instruction) and mtcrf, which
    133   // should be first in the dispatch group.
    134   InstrItinData<IIC_BrMCRX      , [InstrStage<1, [P8_DU1], 0>,
    135                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    136                                   [3, 1, 1]>,
    137   InstrItinData<IIC_BrMCRX      , [InstrStage<1, [P8_DU1], 0>,
    138                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    139                                   [3, 1]>,
    140   InstrItinData<IIC_LdStLoad    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    141                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    142                                    InstrStage<1, [P8_LSU1, P8_LSU2,
    143                                                   P8_LU1, P8_LU2]>],
    144                                   [2, 1, 1]>,
    145   InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P8_DU1], 0>,
    146                                    InstrStage<1, [P8_DU2], 0>,
    147                                    InstrStage<1, [P8_LSU1, P8_LSU2,
    148                                                   P8_LU1, P8_LU2 ], 0>,
    149                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    150                                   [2, 2, 1, 1]>,
    151   // Update-Indexed form loads/stores are no longer first and last in the
    152   // dispatch group.  They are simply cracked, so require DU1,DU2.
    153   InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P8_DU1], 0>,
    154                                    InstrStage<1, [P8_DU2], 0>,
    155                                    InstrStage<1, [P8_LSU1, P8_LSU2,
    156                                                   P8_LU1, P8_LU2], 0>,
    157                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    158                                   [3, 3, 1, 1]>,
    159   InstrItinData<IIC_LdStLD      , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    160                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    161                                    InstrStage<1, [P8_LSU1, P8_LSU2,
    162                                                   P8_LU1, P8_LU2]>],
    163                                   [2, 1, 1]>,
    164   InstrItinData<IIC_LdStLDU     , [InstrStage<1, [P8_DU1], 0>,
    165                                    InstrStage<1, [P8_DU2], 0>,
    166                                    InstrStage<1, [P8_LSU1, P8_LSU2,
    167                                                   P8_LU1, P8_LU2], 0>,
    168                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    169                                   [2, 2, 1, 1]>,
    170   InstrItinData<IIC_LdStLDUX    , [InstrStage<1, [P8_DU1], 0>,
    171                                    InstrStage<1, [P8_DU2], 0>,
    172                                    InstrStage<1, [P8_LSU1, P8_LSU2,
    173                                                   P8_LU1, P8_LU2], 0>,
    174                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    175                                   [3, 3, 1, 1]>,
    176   InstrItinData<IIC_LdStLFD     , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    177                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    178                                    InstrStage<1, [P8_LU1, P8_LU2]>],
    179                                   [3, 1, 1]>,
    180   InstrItinData<IIC_LdStLVecX   , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    181                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    182                                    InstrStage<1, [P8_LU1, P8_LU2]>],
    183                                   [3, 1, 1]>,
    184   InstrItinData<IIC_LdStLFDU    , [InstrStage<1, [P8_DU1], 0>,
    185                                    InstrStage<1, [P8_DU2], 0>,
    186                                    InstrStage<1, [P8_LU1, P8_LU2], 0>,
    187                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    188                                   [3, 3, 1, 1]>,
    189   InstrItinData<IIC_LdStLFDUX   , [InstrStage<1, [P8_DU1], 0>,
    190                                    InstrStage<1, [P8_DU2], 0>,
    191                                    InstrStage<1, [P8_LU1, P8_LU2], 0>,
    192                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    193                                   [3, 3, 1, 1]>,
    194   InstrItinData<IIC_LdStLHA     , [InstrStage<1, [P8_DU1], 0>,
    195                                    InstrStage<1, [P8_DU2], 0>,
    196                                    InstrStage<1, [P8_LSU1, P8_LSU2,
    197                                                   P8_LU1, P8_LU2], 0>,
    198                                    InstrStage<1, [P8_FXU1, P8_FXU2,
    199                                                   P8_LU1, P8_LU2]>],
    200                                   [3, 1, 1]>,
    201   InstrItinData<IIC_LdStLHAU    , [InstrStage<1, [P8_DU1], 0>,
    202                                    InstrStage<1, [P8_DU2], 0>,
    203                                    InstrStage<1, [P8_LSU1, P8_LSU2,
    204                                                   P8_LU1, P8_LU2], 0>,
    205                                    InstrStage<1, [P8_FXU1, P8_FXU2]>,
    206                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    207                                   [4, 4, 1, 1]>,
    208   // first+last in dispatch group.
    209   InstrItinData<IIC_LdStLHAUX   , [InstrStage<1, [P8_DU1], 0>,
    210                                    InstrStage<1, [P8_DU2], 0>,
    211                                    InstrStage<1, [P8_DU3], 0>,
    212                                    InstrStage<1, [P8_DU4], 0>,
    213                                    InstrStage<1, [P8_DU5], 0>,
    214                                    InstrStage<1, [P8_DU6], 0>,
    215                                    InstrStage<1, [P8_LSU1, P8_LSU2,
    216                                                   P8_LU1, P8_LU2], 0>,
    217                                    InstrStage<1, [P8_FXU1, P8_FXU2]>,
    218                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    219                                   [4, 4, 1, 1]>,
    220   InstrItinData<IIC_LdStLWA     , [InstrStage<1, [P8_DU1], 0>,
    221                                    InstrStage<1, [P8_DU2], 0>,
    222                                    InstrStage<1, [P8_LSU1, P8_LSU2,
    223                                                   P8_LU1, P8_LU2]>,
    224                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    225                                   [3, 1, 1]>,
    226   InstrItinData<IIC_LdStLWARX,    [InstrStage<1, [P8_DU1], 0>,
    227                                    InstrStage<1, [P8_DU2], 0>,
    228                                    InstrStage<1, [P8_DU3], 0>,
    229                                    InstrStage<1, [P8_DU4], 0>,
    230                                    InstrStage<1, [P8_LSU1, P8_LSU2,
    231                                                   P8_LU1, P8_LU2]>],
    232                                   [3, 1, 1]>,
    233   // first+last
    234   InstrItinData<IIC_LdStLDARX,    [InstrStage<1, [P8_DU1], 0>,
    235                                    InstrStage<1, [P8_DU2], 0>,
    236                                    InstrStage<1, [P8_DU3], 0>,
    237                                    InstrStage<1, [P8_DU4], 0>,
    238                                    InstrStage<1, [P8_DU5], 0>,
    239                                    InstrStage<1, [P8_DU6], 0>,
    240                                    InstrStage<1, [P8_LSU1, P8_LSU2,
    241                                                   P8_LU1, P8_LU2]>],
    242                                   [3, 1, 1]>,
    243   InstrItinData<IIC_LdStLMW     , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    244                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    245                                    InstrStage<1, [P8_LSU1, P8_LSU2,
    246                                                   P8_LU1, P8_LU2]>],
    247                                   [2, 1, 1]>,
    248 // Stores are dual-issued from the issue queue, so may only take up one
    249 // dispatch slot.  The instruction will be broken into two IOPS. The agen
    250 // op is issued to the LSU, and the data op (register fetch) is issued
    251 // to either the LU (GPR store) or the VSU (FPR store).
    252   InstrItinData<IIC_LdStStore   , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    253                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    254                                    InstrStage<1, [P8_LSU1, P8_LSU2]>,
    255                                    InstrStage<1, [P8_LU1, P8_LU2]>],
    256                                   [1, 1, 1]>,
    257   InstrItinData<IIC_LdStSTD     , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    258                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    259                                    InstrStage<1, [P8_LU1, P8_LU2,
    260                                                   P8_LSU1, P8_LSU2]>]
    261                                   [1, 1, 1]>,
    262   InstrItinData<IIC_LdStSTDU    , [InstrStage<1, [P8_DU1], 0>,
    263                                    InstrStage<1, [P8_DU2], 0>,
    264                                    InstrStage<1, [P8_LU1, P8_LU2,
    265                                                   P8_LSU1, P8_LSU2], 0>,
    266                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    267                                   [2, 1, 1, 1]>,
    268   // First+last
    269   InstrItinData<IIC_LdStSTDUX   , [InstrStage<1, [P8_DU1], 0>,
    270                                    InstrStage<1, [P8_DU2], 0>,
    271                                    InstrStage<1, [P8_DU3], 0>,
    272                                    InstrStage<1, [P8_DU4], 0>,
    273                                    InstrStage<1, [P8_DU5], 0>,
    274                                    InstrStage<1, [P8_DU6], 0>,
    275                                    InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
    276                                    InstrStage<1, [P8_FXU1, P8_FXU2]>,
    277                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    278                                   [2, 1, 1, 1]>,
    279   InstrItinData<IIC_LdStSTFD    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    280                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    281                                    InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
    282                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    283                                   [1, 1, 1]>,
    284   InstrItinData<IIC_LdStSTFDU   , [InstrStage<1, [P8_DU1], 0>,
    285                                    InstrStage<1, [P8_DU2], 0>,
    286                                    InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
    287                                    InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
    288                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    289                                   [2, 1, 1, 1]>,
    290   InstrItinData<IIC_LdStSTVEBX  , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    291                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    292                                    InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
    293                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    294                                   [1, 1, 1]>,
    295   InstrItinData<IIC_LdStSTDCX   , [InstrStage<1, [P8_DU1], 0>,
    296                                    InstrStage<1, [P8_DU2], 0>,
    297                                    InstrStage<1, [P8_DU3], 0>,
    298                                    InstrStage<1, [P8_DU4], 0>,
    299                                    InstrStage<1, [P8_DU5], 0>,
    300                                    InstrStage<1, [P8_DU6], 0>,
    301                                    InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
    302                                    InstrStage<1, [P8_LU1, P8_LU2]>],
    303                                   [1, 1, 1]>,
    304   InstrItinData<IIC_LdStSTWCX   , [InstrStage<1, [P8_DU1], 0>,
    305                                    InstrStage<1, [P8_DU2], 0>,
    306                                    InstrStage<1, [P8_DU3], 0>,
    307                                    InstrStage<1, [P8_DU4], 0>,
    308                                    InstrStage<1, [P8_DU5], 0>,
    309                                    InstrStage<1, [P8_DU6], 0>,
    310                                    InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
    311                                    InstrStage<1, [P8_LU1, P8_LU2]>],
    312                                   [1, 1, 1]>,
    313   InstrItinData<IIC_SprMFCR     , [InstrStage<1, [P8_DU1], 0>,
    314                                    InstrStage<1, [P8_CRU]>],
    315                                   [6, 1]>,
    316   InstrItinData<IIC_SprMFCRF    , [InstrStage<1, [P8_DU1], 0>,
    317                                    InstrStage<1, [P8_CRU]>],
    318                                   [3, 1]>,
    319   InstrItinData<IIC_SprMTSPR    , [InstrStage<1, [P8_DU1], 0>,
    320                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
    321                                   [4, 1]>, // mtctr
    322   InstrItinData<IIC_FPGeneral   , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    323                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    324                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    325                                   [5, 1, 1]>,
    326   InstrItinData<IIC_FPAddSub    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    327                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    328                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    329                                   [5, 1, 1]>,
    330   InstrItinData<IIC_FPCompare   , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    331                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    332                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    333                                   [8, 1, 1]>,
    334   InstrItinData<IIC_FPDivD      , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    335                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    336                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    337                                   [33, 1, 1]>,
    338   InstrItinData<IIC_FPDivS      , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    339                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    340                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    341                                   [27, 1, 1]>,
    342   InstrItinData<IIC_FPSqrtD     , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    343                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    344                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    345                                   [44, 1, 1]>,
    346   InstrItinData<IIC_FPSqrtS     , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    347                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    348                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    349                                   [32, 1, 1]>,
    350   InstrItinData<IIC_FPFused     , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    351                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    352                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    353                                   [5, 1, 1, 1]>,
    354   InstrItinData<IIC_FPRes       , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
    355                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
    356                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    357                                   [5, 1, 1]>,
    358   InstrItinData<IIC_VecGeneral  , [InstrStage<1, [P8_DU1], 0>,
    359                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    360                                   [2, 1, 1]>,
    361   InstrItinData<IIC_VecVSL      , [InstrStage<1, [P8_DU1], 0>,
    362                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    363                                   [2, 1, 1]>,
    364   InstrItinData<IIC_VecVSR      , [InstrStage<1, [P8_DU1], 0>,
    365                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    366                                   [2, 1, 1]>,
    367   InstrItinData<IIC_VecFP       , [InstrStage<1, [P8_DU1], 0>,
    368                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    369                                   [6, 1, 1]>,
    370   InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P8_DU1], 0>,
    371                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    372                                   [6, 1, 1]>,
    373   InstrItinData<IIC_VecFPRound  , [InstrStage<1, [P8_DU1], 0>,
    374                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    375                                   [6, 1, 1]>,
    376   InstrItinData<IIC_VecComplex  , [InstrStage<1, [P8_DU1], 0>,
    377                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
    378                                   [7, 1, 1]>,
    379   InstrItinData<IIC_VecPerm     , [InstrStage<1, [P8_DU1, P8_DU2], 0>,
    380                                    InstrStage<1, [P8_FPU2, P8_FPU2]>],
    381                                   [3, 1, 1]>
    382 ]>;
    383 
    384 // ===---------------------------------------------------------------------===//
    385 // P8 machine model for scheduling and other instruction cost heuristics.
    386 // P8 has an 8 insn dispatch group (6 non-branch, 2 branch) and can issue up
    387 // to 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
    388 
    389 def P8Model : SchedMachineModel {
    390   let IssueWidth = 8;  // up to 8 instructions dispatched per cycle.
    391                        // up to six non-branch instructions.
    392                        // up to two branches in a dispatch group.
    393 
    394   let MinLatency = 0;  // Out-of-order dispatch.
    395   let LoadLatency = 3; // Optimistic load latency assuming bypass.
    396                        // This is overriden by OperandCycles if the
    397                        // Itineraries are queried instead.
    398   let MispredictPenalty = 16;
    399 
    400   // Try to make sure we have at least 10 dispatch groups in a loop.
    401   let LoopMicroOpBufferSize = 60;
    402 
    403   let Itineraries = P8Itineraries;
    404 }
    405 
    406