Home | History | Annotate | Download | only in ARM
      1 //=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the itinerary class data for the Swift processor..
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 // ===---------------------------------------------------------------------===//
     15 // This section contains legacy support for itineraries. This is
     16 // required until SD and PostRA schedulers are replaced by MachineScheduler.
     17 
     18 def SW_DIS0 : FuncUnit;
     19 def SW_DIS1 : FuncUnit;
     20 def SW_DIS2 : FuncUnit;
     21 
     22 def SW_ALU0 : FuncUnit;
     23 def SW_ALU1 : FuncUnit;
     24 def SW_LS   : FuncUnit;
     25 def SW_IDIV : FuncUnit;
     26 def SW_FDIV : FuncUnit;
     27 
     28 // FIXME: Need bypasses.
     29 // FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and
     30 //        IIC_iMOVix2ld better.
     31 // FIXME: Model the special immediate shifts that are not microcoded.
     32 // FIXME: Do we need to model the fact that uses of r15 in a micro-op force it
     33 //        to issue on pipe 1?
     34 // FIXME: Model the pipelined behavior of CMP / TST instructions.
     35 // FIXME: Better model the microcode stages of multiply instructions, especially
     36 //        conditional variants.
     37 // FIXME: Add preload instruction when it is documented.
     38 // FIXME: Model non-pipelined nature of FP div / sqrt unit.
     39 
     40 def SwiftItineraries : ProcessorItineraries<
     41   [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
     42   //
     43   // Move instructions, unconditional
     44   InstrItinData<IIC_iMOVi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     45                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     46                               [1]>,
     47   InstrItinData<IIC_iMOVr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     48                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     49                               [1]>,
     50   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     51                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     52                               [1]>,
     53   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     54                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     55                               [1]>,
     56   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     57                                InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     58                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
     59                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     60                               [2]>,
     61   InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     62                                   InstrStage<1, [SW_ALU0, SW_ALU1]>,
     63                                   InstrStage<1, [SW_ALU0, SW_ALU1]>,
     64                                   InstrStage<1, [SW_ALU0, SW_ALU1]>],
     65                                  [3]>,
     66   InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     67                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
     68                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
     69                                InstrStage<1, [SW_LS]>],
     70                               [5]>,
     71   //
     72   // MVN instructions
     73   InstrItinData<IIC_iMVNi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     74                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     75                               [1]>,
     76   InstrItinData<IIC_iMVNr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     77                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     78                               [1]>,
     79   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     80                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     81                               [1]>,
     82   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     83                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     84                               [1]>,
     85   //
     86   // No operand cycles
     87   InstrItinData<IIC_iALUx   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     88                                InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
     89   //
     90   // Binary Instructions that produce a result
     91   InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     92                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
     93                             [1, 1]>,
     94   InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     95                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
     96                             [1, 1, 1]>,
     97   InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     98                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
     99                             [2, 1, 1]>,
    100   InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    101                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    102                             [2, 1, 1]>,
    103   InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    104                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    105                             [2, 1, 1, 1]>,
    106   //
    107   // Bitwise Instructions that produce a result
    108   InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    109                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    110                             [1, 1]>,
    111   InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    112                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    113                             [1, 1, 1]>,
    114   InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    115                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    116                             [2, 1, 1]>,
    117   InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    118                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    119                             [2, 1, 1, 1]>,
    120   //
    121   // Unary Instructions that produce a result
    122 
    123   // CLZ, RBIT, etc.
    124   InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    125                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    126                             [1, 1]>,
    127 
    128   // BFC, BFI, UBFX, SBFX
    129   InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    130                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    131                             [2, 1]>,
    132 
    133   //
    134   // Zero and sign extension instructions
    135   InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    136                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    137                             [1, 1]>,
    138   InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    139                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    140                             [1, 1, 1]>,
    141   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    142                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    143                             [1, 1, 1, 1]>,
    144   //
    145   // Compare instructions
    146   InstrItinData<IIC_iCMPi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    147                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    148                               [1]>,
    149   InstrItinData<IIC_iCMPr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    150                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    151                               [1, 1]>,
    152   InstrItinData<IIC_iCMPsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    153                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
    154                               [1, 1]>,
    155   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    156                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
    157                               [1, 1, 1]>,
    158   //
    159   // Test instructions
    160   InstrItinData<IIC_iTSTi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    161                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    162                               [1]>,
    163   InstrItinData<IIC_iTSTr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    164                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    165                               [1, 1]>,
    166   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    167                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
    168                               [1, 1]>,
    169   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    170                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
    171                               [1, 1, 1]>,
    172   //
    173   // Move instructions, conditional
    174   // FIXME: Correctly model the extra input dep on the destination.
    175   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    176                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    177                               [1]>,
    178   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    179                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    180                               [1, 1]>,
    181   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    182                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    183                               [1, 1]>,
    184   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    185                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    186                               [2, 1, 1]>,
    187   InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    188                                InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    189                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
    190                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    191                               [2]>,
    192 
    193   // Integer multiply pipeline
    194   //
    195   InstrItinData<IIC_iMUL16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    196                                InstrStage<1, [SW_ALU0]>],
    197                               [3, 1, 1]>,
    198   InstrItinData<IIC_iMAC16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    199                                InstrStage<1, [SW_ALU0]>],
    200                               [3, 1, 1, 1]>,
    201   InstrItinData<IIC_iMUL32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    202                                InstrStage<1, [SW_ALU0]>],
    203                               [4, 1, 1]>,
    204   InstrItinData<IIC_iMAC32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    205                                InstrStage<1, [SW_ALU0]>],
    206                               [4, 1, 1, 1]>,
    207   InstrItinData<IIC_iMUL64  , [InstrStage<1, [SW_DIS0], 0>,
    208                                InstrStage<1, [SW_DIS1], 0>,
    209                                InstrStage<1, [SW_DIS2], 0>,
    210                                InstrStage<1, [SW_ALU0], 1>,
    211                                InstrStage<1, [SW_ALU0], 3>,
    212                                InstrStage<1, [SW_ALU0]>],
    213                               [5, 5, 1, 1]>,
    214   InstrItinData<IIC_iMAC64  , [InstrStage<1, [SW_DIS0], 0>,
    215                                InstrStage<1, [SW_DIS1], 0>,
    216                                InstrStage<1, [SW_DIS2], 0>,
    217                                InstrStage<1, [SW_ALU0], 1>,
    218                                InstrStage<1, [SW_ALU0], 1>,
    219                                InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
    220                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    221                               [5, 6, 1, 1]>,
    222   //
    223   // Integer divide
    224   InstrItinData<IIC_iDIV  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    225                              InstrStage<1, [SW_ALU0], 0>,
    226                              InstrStage<14, [SW_IDIV]>],
    227                             [14, 1, 1]>,
    228 
    229   // Integer load pipeline
    230   // FIXME: The timings are some rough approximations
    231   //
    232   // Immediate offset
    233   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    234                                  InstrStage<1, [SW_LS]>],
    235                                 [3, 1]>,
    236   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    237                                  InstrStage<1, [SW_LS]>],
    238                                 [3, 1]>,
    239   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
    240                                  InstrStage<1, [SW_DIS1], 0>,
    241                                  InstrStage<1, [SW_LS], 1>,
    242                                  InstrStage<1, [SW_LS]>],
    243                                 [3, 4, 1]>,
    244   //
    245   // Register offset
    246   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    247                                  InstrStage<1, [SW_LS]>],
    248                                 [3, 1, 1]>,
    249   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    250                                  InstrStage<1, [SW_LS]>],
    251                                 [3, 1, 1]>,
    252   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
    253                                  InstrStage<1, [SW_DIS1], 0>,
    254                                  InstrStage<1, [SW_DIS2], 0>,
    255                                  InstrStage<1, [SW_LS], 1>,
    256                                  InstrStage<1, [SW_LS], 3>,
    257                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
    258                                 [3, 4, 1, 1]>,
    259   //
    260   // Scaled register offset
    261   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [SW_DIS0], 0>,
    262                                  InstrStage<1, [SW_DIS1], 0>,
    263                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    264                                  InstrStage<1, [SW_LS]>],
    265                                 [5, 1, 1]>,
    266   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
    267                                  InstrStage<1, [SW_DIS1], 0>,
    268                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    269                                  InstrStage<1, [SW_LS]>],
    270                                 [5, 1, 1]>,
    271   //
    272   // Immediate offset with update
    273   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [SW_DIS0], 0>,
    274                                  InstrStage<1, [SW_DIS1], 0>,
    275                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    276                                  InstrStage<1, [SW_LS]>],
    277                                 [3, 1, 1]>,
    278   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
    279                                  InstrStage<1, [SW_DIS1], 0>,
    280                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    281                                  InstrStage<1, [SW_LS]>],
    282                                 [3, 1, 1]>,
    283   //
    284   // Register offset with update
    285   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [SW_DIS0], 0>,
    286                                  InstrStage<1, [SW_DIS1], 0>,
    287                                  InstrStage<1, [SW_ALU0], 1>,
    288                                  InstrStage<1, [SW_LS]>],
    289                                 [3, 1, 1, 1]>,
    290   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
    291                                  InstrStage<1, [SW_DIS1], 0>,
    292                                  InstrStage<1, [SW_ALU0], 1>,
    293                                  InstrStage<1, [SW_LS]>],
    294                                 [3, 1, 1, 1]>,
    295   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
    296                                  InstrStage<1, [SW_DIS1], 0>,
    297                                  InstrStage<1, [SW_DIS2], 0>,
    298                                  InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
    299                                  InstrStage<1, [SW_LS], 3>,
    300                                  InstrStage<1, [SW_LS], 0>,
    301                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
    302                                 [3, 4, 1, 1]>,
    303   //
    304   // Scaled register offset with update
    305   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
    306                                  InstrStage<1, [SW_DIS1], 0>,
    307                                  InstrStage<1, [SW_DIS2], 0>,
    308                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    309                                  InstrStage<1, [SW_LS], 3>,
    310                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
    311                                 [5, 3, 1, 1]>,
    312   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
    313                                   InstrStage<1, [SW_DIS1], 0>,
    314                                   InstrStage<1, [SW_DIS2], 0>,
    315                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    316                                   InstrStage<1, [SW_LS], 0>,
    317                                   InstrStage<1, [SW_ALU0, SW_ALU1]>],
    318                                 [5, 3, 1, 1]>,
    319   //
    320   // Load multiple, def is the 5th operand.
    321   // FIXME: This assumes 3 to 4 registers.
    322   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [SW_DIS0], 0>,
    323                                 InstrStage<1, [SW_DIS1], 0>,
    324                                 InstrStage<1, [SW_DIS2], 0>,
    325                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    326                                 InstrStage<1, [SW_LS]>],
    327                                [1, 1, 1, 1, 3], [], -1>, // dynamic uops
    328 
    329   //
    330   // Load multiple + update, defs are the 1st and 5th operands.
    331   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
    332                                 InstrStage<1, [SW_DIS1], 0>,
    333                                 InstrStage<1, [SW_DIS2], 0>,
    334                                 InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
    335                                 InstrStage<1, [SW_LS], 3>,
    336                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
    337                                [2, 1, 1, 1, 3], [], -1>, // dynamic uops
    338   //
    339   // Load multiple plus branch
    340   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
    341                                 InstrStage<1, [SW_DIS1], 0>,
    342                                 InstrStage<1, [SW_DIS2], 0>,
    343                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    344                                 InstrStage<1, [SW_LS]>],
    345                                [1, 1, 1, 1, 3], [], -1>, // dynamic uops
    346   //
    347   // Pop, def is the 3rd operand.
    348   InstrItinData<IIC_iPop  ,    [InstrStage<1, [SW_DIS0], 0>,
    349                                 InstrStage<1, [SW_DIS1], 0>,
    350                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    351                                 InstrStage<1, [SW_LS]>],
    352                                [1, 1, 3], [], -1>, // dynamic uops
    353   //
    354   // Pop + branch, def is the 3rd operand.
    355   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [SW_DIS0], 0>,
    356                                 InstrStage<1, [SW_DIS1], 0>,
    357                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    358                                 InstrStage<1, [SW_LS]>],
    359                                [1, 1, 3], [], -1>, // dynamic uops
    360 
    361   //
    362   // iLoadi + iALUr for t2LDRpci_pic.
    363   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    364                                 InstrStage<1, [SW_LS], 3>,
    365                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
    366                                [4, 1]>,
    367 
    368   // Integer store pipeline
    369   ///
    370   // Immediate offset
    371   InstrItinData<IIC_iStore_i  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    372                                  InstrStage<1, [SW_LS]>],
    373                                 [1, 1]>,
    374   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    375                                  InstrStage<1, [SW_LS]>],
    376                                 [1, 1]>,
    377   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
    378                                  InstrStage<1, [SW_DIS1], 0>,
    379                                  InstrStage<1, [SW_DIS2], 0>,
    380                                  InstrStage<1, [SW_LS], 0>,
    381                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    382                                  InstrStage<1, [SW_LS]>],
    383                                 [1, 1]>,
    384   //
    385   // Register offset
    386   InstrItinData<IIC_iStore_r  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    387                                  InstrStage<1, [SW_LS]>],
    388                                 [1, 1, 1]>,
    389   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    390                                  InstrStage<1, [SW_LS]>],
    391                                 [1, 1, 1]>,
    392   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
    393                                  InstrStage<1, [SW_DIS1], 0>,
    394                                  InstrStage<1, [SW_DIS2], 0>,
    395                                  InstrStage<1, [SW_LS], 0>,
    396                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    397                                  InstrStage<1, [SW_LS]>],
    398                                 [1, 1, 1]>,
    399   //
    400   // Scaled register offset
    401   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [SW_DIS0], 0>,
    402                                   InstrStage<1, [SW_DIS1], 0>,
    403                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    404                                   InstrStage<1, [SW_LS]>],
    405                                  [1, 1, 1]>,
    406   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
    407                                   InstrStage<1, [SW_DIS1], 0>,
    408                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    409                                   InstrStage<1, [SW_LS]>],
    410                                  [1, 1, 1]>,
    411   //
    412   // Immediate offset with update
    413   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [SW_DIS0], 0>,
    414                                   InstrStage<1, [SW_DIS1], 0>,
    415                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    416                                   InstrStage<1, [SW_LS]>],
    417                                  [1, 1, 1]>,
    418   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
    419                                   InstrStage<1, [SW_DIS1], 0>,
    420                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    421                                   InstrStage<1, [SW_LS]>],
    422                                  [1, 1, 1]>,
    423   //
    424   // Register offset with update
    425   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [SW_DIS0], 0>,
    426                                   InstrStage<1, [SW_DIS1], 0>,
    427                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    428                                   InstrStage<1, [SW_LS]>],
    429                                  [1, 1, 1, 1]>,
    430   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
    431                                   InstrStage<1, [SW_DIS1], 0>,
    432                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    433                                   InstrStage<1, [SW_LS]>],
    434                                  [1, 1, 1, 1]>,
    435   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
    436                                   InstrStage<1, [SW_DIS1], 0>,
    437                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    438                                   InstrStage<1, [SW_LS]>],
    439                                  [1, 1, 1, 1]>,
    440   //
    441   // Scaled register offset with update
    442   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [SW_DIS0], 0>,
    443                                     InstrStage<1, [SW_DIS1], 0>,
    444                                     InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    445                                     InstrStage<1, [SW_LS], 0>,
    446                                     InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
    447                                    [3, 1, 1, 1]>,
    448   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
    449                                     InstrStage<1, [SW_DIS1], 0>,
    450                                     InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    451                                     InstrStage<1, [SW_LS], 0>,
    452                                     InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
    453                                    [3, 1, 1, 1]>,
    454   //
    455   // Store multiple
    456   InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
    457                                 InstrStage<1, [SW_DIS1], 0>,
    458                                 InstrStage<1, [SW_DIS2], 0>,
    459                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    460                                 InstrStage<1, [SW_LS], 1>,
    461                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    462                                 InstrStage<1, [SW_LS], 1>,
    463                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    464                                 InstrStage<1, [SW_LS]>],
    465                                 [], [], -1>, // dynamic uops
    466   //
    467   // Store multiple + update
    468   InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
    469                                 InstrStage<1, [SW_DIS1], 0>,
    470                                 InstrStage<1, [SW_DIS2], 0>,
    471                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    472                                 InstrStage<1, [SW_LS], 1>,
    473                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    474                                 InstrStage<1, [SW_LS], 1>,
    475                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    476                                 InstrStage<1, [SW_LS]>],
    477                                [2], [], -1>, // dynamic uops
    478 
    479   //
    480   // Preload
    481   InstrItinData<IIC_Preload,   [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
    482 
    483   // Branch
    484   //
    485   // no delay slots, so the latency of a branch is unimportant
    486   InstrItinData<IIC_Br       , [InstrStage<1, [SW_DIS0], 0>]>,
    487 
    488   // FP Special Register to Integer Register File Move
    489   InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    490                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
    491                              [1]>,
    492   //
    493   // Single-precision FP Unary
    494   //
    495   // Most floating-point moves get issued on ALU0.
    496   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    497                                InstrStage<1, [SW_ALU0]>],
    498                               [2, 1]>,
    499   //
    500   // Double-precision FP Unary
    501   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    502                                InstrStage<1, [SW_ALU0]>],
    503                               [2, 1]>,
    504 
    505   //
    506   // Single-precision FP Compare
    507   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    508                                InstrStage<1, [SW_ALU0]>],
    509                               [1, 1]>,
    510   //
    511   // Double-precision FP Compare
    512   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    513                                InstrStage<1, [SW_ALU0]>],
    514                               [1, 1]>,
    515   //
    516   // Single to Double FP Convert
    517   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    518                                InstrStage<1, [SW_ALU1]>],
    519                               [4, 1]>,
    520   //
    521   // Double to Single FP Convert
    522   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    523                                InstrStage<1, [SW_ALU1]>],
    524                               [4, 1]>,
    525 
    526   //
    527   // Single to Half FP Convert
    528   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
    529                                InstrStage<1, [SW_DIS1], 0>,
    530                                InstrStage<1, [SW_ALU1], 4>,
    531                                InstrStage<1, [SW_ALU1]>],
    532                               [6, 1]>,
    533   //
    534   // Half to Single FP Convert
    535   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    536                                InstrStage<1, [SW_ALU1]>],
    537                               [4, 1]>,
    538 
    539   //
    540   // Single-Precision FP to Integer Convert
    541   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    542                                InstrStage<1, [SW_ALU1]>],
    543                               [4, 1]>,
    544   //
    545   // Double-Precision FP to Integer Convert
    546   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    547                                InstrStage<1, [SW_ALU1]>],
    548                               [4, 1]>,
    549   //
    550   // Integer to Single-Precision FP Convert
    551   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    552                                InstrStage<1, [SW_ALU1]>],
    553                               [4, 1]>,
    554   //
    555   // Integer to Double-Precision FP Convert
    556   InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    557                                InstrStage<1, [SW_ALU1]>],
    558                               [4, 1]>,
    559   //
    560   // Single-precision FP ALU
    561   InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    562                                InstrStage<1, [SW_ALU0]>],
    563                               [2, 1, 1]>,
    564   //
    565   // Double-precision FP ALU
    566   InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    567                                InstrStage<1, [SW_ALU0]>],
    568                               [2, 1, 1]>,
    569   //
    570   // Single-precision FP Multiply
    571   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    572                                InstrStage<1, [SW_ALU1]>],
    573                               [4, 1, 1]>,
    574   //
    575   // Double-precision FP Multiply
    576   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    577                                InstrStage<1, [SW_ALU1]>],
    578                               [6, 1, 1]>,
    579   //
    580   // Single-precision FP MAC
    581   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    582                                InstrStage<1, [SW_ALU1]>],
    583                               [8, 1, 1]>,
    584   //
    585   // Double-precision FP MAC
    586   InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    587                                InstrStage<1, [SW_ALU1]>],
    588                               [12, 1, 1]>,
    589   //
    590   // Single-precision Fused FP MAC
    591   InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    592                                InstrStage<1, [SW_ALU1]>],
    593                               [8, 1, 1]>,
    594   //
    595   // Double-precision Fused FP MAC
    596   InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    597                                InstrStage<1, [SW_ALU1]>],
    598                               [12, 1, 1]>,
    599   //
    600   // Single-precision FP DIV
    601   InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    602                                InstrStage<1, [SW_ALU1], 0>,
    603                                InstrStage<15, [SW_FDIV]>],
    604                               [17, 1, 1]>,
    605   //
    606   // Double-precision FP DIV
    607   InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    608                                InstrStage<1, [SW_ALU1], 0>,
    609                                InstrStage<30, [SW_FDIV]>],
    610                               [32, 1, 1]>,
    611   //
    612   // Single-precision FP SQRT
    613   InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    614                                InstrStage<1, [SW_ALU1], 0>,
    615                                InstrStage<15, [SW_FDIV]>],
    616                               [17, 1]>,
    617   //
    618   // Double-precision FP SQRT
    619   InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    620                                InstrStage<1, [SW_ALU1], 0>,
    621                                InstrStage<30, [SW_FDIV]>],
    622                               [32, 1, 1]>,
    623 
    624   //
    625   // Integer to Single-precision Move
    626   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [SW_DIS0], 0>,
    627                                InstrStage<1, [SW_DIS1], 0>,
    628                                InstrStage<1, [SW_LS], 4>,
    629                                InstrStage<1, [SW_ALU0]>],
    630                               [6, 1]>,
    631   //
    632   // Integer to Double-precision Move
    633   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    634                                InstrStage<1, [SW_LS]>],
    635                               [4, 1]>,
    636   //
    637   // Single-precision to Integer Move
    638   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    639                                InstrStage<1, [SW_LS]>],
    640                               [3, 1]>,
    641   //
    642   // Double-precision to Integer Move
    643   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [SW_DIS0], 0>,
    644                                InstrStage<1, [SW_DIS1], 0>,
    645                                InstrStage<1, [SW_LS], 3>,
    646                                InstrStage<1, [SW_LS]>],
    647                               [3, 4, 1]>,
    648   //
    649   // Single-precision FP Load
    650   InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    651                                InstrStage<1, [SW_LS]>],
    652                               [4, 1]>,
    653   //
    654   // Double-precision FP Load
    655   InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    656                                InstrStage<1, [SW_LS]>],
    657                               [4, 1]>,
    658   //
    659   // FP Load Multiple
    660   // FIXME: Assumes a single Q register.
    661   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    662                                InstrStage<1, [SW_LS]>],
    663                               [1, 1, 1, 4], [], -1>, // dynamic uops
    664   //
    665   // FP Load Multiple + update
    666   // FIXME: Assumes a single Q register.
    667   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
    668                                InstrStage<1, [SW_DIS1], 0>,
    669                                InstrStage<1, [SW_LS], 4>,
    670                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    671                               [2, 1, 1, 1, 4], [], -1>, // dynamic uops
    672   //
    673   // Single-precision FP Store
    674   InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    675                                InstrStage<1, [SW_LS]>],
    676                               [1, 1]>,
    677   //
    678   // Double-precision FP Store
    679   InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    680                                InstrStage<1, [SW_LS]>],
    681                               [1, 1]>,
    682   //
    683   // FP Store Multiple
    684   // FIXME: Assumes a single Q register.
    685   InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    686                                InstrStage<1, [SW_LS]>],
    687                               [1, 1, 1], [], -1>, // dynamic uops
    688   //
    689   // FP Store Multiple + update
    690   // FIXME: Assumes a single Q register.
    691   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
    692                                 InstrStage<1, [SW_DIS1], 0>,
    693                                 InstrStage<1, [SW_LS], 4>,
    694                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
    695                                [2, 1, 1, 1], [], -1>, // dynamic uops
    696   // NEON
    697   //
    698   // Double-register Integer Unary
    699   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    700                                InstrStage<1, [SW_ALU0]>],
    701                               [4, 1]>,
    702   //
    703   // Quad-register Integer Unary
    704   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    705                                InstrStage<1, [SW_ALU0]>],
    706                               [4, 1]>,
    707   //
    708   // Double-register Integer Q-Unary
    709   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    710                                InstrStage<1, [SW_ALU0]>],
    711                               [4, 1]>,
    712   //
    713   // Quad-register Integer CountQ-Unary
    714   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    715                                InstrStage<1, [SW_ALU0]>],
    716                               [4, 1]>,
    717   //
    718   // Double-register Integer Binary
    719   InstrItinData<IIC_VBINiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    720                                InstrStage<1, [SW_ALU0]>],
    721                               [2, 1, 1]>,
    722   //
    723   // Quad-register Integer Binary
    724   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    725                                InstrStage<1, [SW_ALU0]>],
    726                               [2, 1, 1]>,
    727   //
    728   // Double-register Integer Subtract
    729   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    730                                InstrStage<1, [SW_ALU0]>],
    731                               [2, 1, 1]>,
    732   //
    733   // Quad-register Integer Subtract
    734   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    735                                InstrStage<1, [SW_ALU0]>],
    736                               [2, 1, 1]>,
    737   //
    738   // Double-register Integer Shift
    739   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    740                                InstrStage<1, [SW_ALU0]>],
    741                               [2, 1, 1]>,
    742   //
    743   // Quad-register Integer Shift
    744   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    745                                InstrStage<1, [SW_ALU0]>],
    746                               [2, 1, 1]>,
    747   //
    748   // Double-register Integer Shift (4 cycle)
    749   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    750                                InstrStage<1, [SW_ALU0]>],
    751                               [4, 1, 1]>,
    752   //
    753   // Quad-register Integer Shift (4 cycle)
    754   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    755                                InstrStage<1, [SW_ALU0]>],
    756                               [4, 1, 1]>,
    757   //
    758   // Double-register Integer Binary (4 cycle)
    759   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    760                                InstrStage<1, [SW_ALU0]>],
    761                               [4, 1, 1]>,
    762   //
    763   // Quad-register Integer Binary (4 cycle)
    764   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    765                                InstrStage<1, [SW_ALU0]>],
    766                               [4, 1, 1]>,
    767   //
    768   // Double-register Integer Subtract (4 cycle)
    769   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    770                                InstrStage<1, [SW_ALU0]>],
    771                               [4, 1, 1]>,
    772   //
    773   // Quad-register Integer Subtract (4 cycle)
    774   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    775                                InstrStage<1, [SW_ALU0]>],
    776                               [4, 1, 1]>,
    777 
    778   //
    779   // Double-register Integer Count
    780   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    781                                InstrStage<1, [SW_ALU0]>],
    782                               [2, 1, 1]>,
    783   //
    784   // Quad-register Integer Count
    785   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    786                                InstrStage<1, [SW_ALU0]>],
    787                               [2, 1, 1]>,
    788   //
    789   // Double-register Absolute Difference and Accumulate
    790   InstrItinData<IIC_VABAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    791                                InstrStage<1, [SW_ALU0]>],
    792                               [4, 1, 1, 1]>,
    793   //
    794   // Quad-register Absolute Difference and Accumulate
    795   InstrItinData<IIC_VABAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    796                                InstrStage<1, [SW_ALU0]>],
    797                               [4, 1, 1, 1]>,
    798   //
    799   // Double-register Integer Pair Add Long
    800   InstrItinData<IIC_VPALiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    801                                InstrStage<1, [SW_ALU0]>],
    802                               [4, 1, 1]>,
    803   //
    804   // Quad-register Integer Pair Add Long
    805   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    806                                InstrStage<1, [SW_ALU0]>],
    807                               [4, 1, 1]>,
    808 
    809   //
    810   // Double-register Integer Multiply (.8, .16)
    811   InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    812                                InstrStage<1, [SW_ALU1]>],
    813                               [4, 1, 1]>,
    814   //
    815   // Quad-register Integer Multiply (.8, .16)
    816   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    817                                InstrStage<1, [SW_ALU1]>],
    818                               [4, 1, 1]>,
    819 
    820   //
    821   // Double-register Integer Multiply (.32)
    822   InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    823                                InstrStage<1, [SW_ALU1]>],
    824                               [4, 1, 1]>,
    825   //
    826   // Quad-register Integer Multiply (.32)
    827   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    828                                InstrStage<1, [SW_ALU1]>],
    829                               [4, 1, 1]>,
    830   //
    831   // Double-register Integer Multiply-Accumulate (.8, .16)
    832   InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    833                                InstrStage<1, [SW_ALU1]>],
    834                               [4, 1, 1, 1]>,
    835   //
    836   // Double-register Integer Multiply-Accumulate (.32)
    837   InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    838                                InstrStage<1, [SW_ALU1]>],
    839                               [4, 1, 1, 1]>,
    840   //
    841   // Quad-register Integer Multiply-Accumulate (.8, .16)
    842   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    843                                InstrStage<1, [SW_ALU1]>],
    844                               [4, 1, 1, 1]>,
    845   //
    846   // Quad-register Integer Multiply-Accumulate (.32)
    847   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    848                                InstrStage<1, [SW_ALU1]>],
    849                               [4, 1, 1, 1]>,
    850 
    851   //
    852   // Move
    853   InstrItinData<IIC_VMOV,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    854                                InstrStage<1, [SW_ALU0]>],
    855                               [2, 1]>,
    856   //
    857   // Move Immediate
    858   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    859                                InstrStage<1, [SW_ALU0]>],
    860                               [2]>,
    861   //
    862   // Double-register Permute Move
    863   InstrItinData<IIC_VMOVD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    864                                InstrStage<1, [SW_ALU1]>],
    865                               [2, 1]>,
    866   //
    867   // Quad-register Permute Move
    868   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    869                                InstrStage<1, [SW_ALU1]>],
    870                               [2, 1]>,
    871   //
    872   // Integer to Single-precision Move
    873   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [SW_DIS0], 0>,
    874                                InstrStage<1, [SW_DIS1], 0>,
    875                                InstrStage<1, [SW_LS], 4>,
    876                                InstrStage<1, [SW_ALU0]>],
    877                               [6, 1]>,
    878   //
    879   // Integer to Double-precision Move
    880   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    881                                InstrStage<1, [SW_LS]>],
    882                               [4, 1, 1]>,
    883   //
    884   // Single-precision to Integer Move
    885   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    886                                InstrStage<1, [SW_LS]>],
    887                               [3, 1]>,
    888   //
    889   // Double-precision to Integer Move
    890   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [SW_DIS0], 0>,
    891                                InstrStage<1, [SW_DIS1], 0>,
    892                                InstrStage<1, [SW_LS], 3>,
    893                                InstrStage<1, [SW_LS]>],
    894                               [3, 4, 1]>,
    895   //
    896   // Integer to Lane Move
    897   // FIXME: I think this is correct, but it is not clear from the tuning guide.
    898   InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
    899                                InstrStage<1, [SW_DIS1], 0>,
    900                                InstrStage<1, [SW_LS], 4>,
    901                                InstrStage<1, [SW_ALU0]>],
    902                               [6, 1]>,
    903 
    904   //
    905   // Vector narrow move
    906   InstrItinData<IIC_VMOVN,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    907                                InstrStage<1, [SW_ALU1]>],
    908                               [2, 1]>,
    909   //
    910   // Double-register FP Unary
    911   // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
    912   //        and they issue on a different pipeline.
    913   InstrItinData<IIC_VUNAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    914                                InstrStage<1, [SW_ALU0]>],
    915                               [2, 1]>,
    916   //
    917   // Quad-register FP Unary
    918   // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
    919   //        and they issue on a different pipeline.
    920   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    921                                InstrStage<1, [SW_ALU0]>],
    922                               [2, 1]>,
    923   //
    924   // Double-register FP Binary
    925   // FIXME: We're using this itin for many instructions.
    926   InstrItinData<IIC_VBIND,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    927                                InstrStage<1, [SW_ALU0]>],
    928                               [4, 1, 1]>,
    929 
    930   //
    931   // VPADD, etc.
    932   InstrItinData<IIC_VPBIND,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    933                                InstrStage<1, [SW_ALU0]>],
    934                               [4, 1, 1]>,
    935   //
    936   // Double-register FP VMUL
    937   InstrItinData<IIC_VFMULD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    938                                InstrStage<1, [SW_ALU1]>],
    939                               [4, 1, 1]>,
    940   //
    941   // Quad-register FP Binary
    942   InstrItinData<IIC_VBINQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    943                                InstrStage<1, [SW_ALU0]>],
    944                               [4, 1, 1]>,
    945   //
    946   // Quad-register FP VMUL
    947   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    948                                InstrStage<1, [SW_ALU1]>],
    949                               [4, 1, 1]>,
    950   //
    951   // Double-register FP Multiple-Accumulate
    952   InstrItinData<IIC_VMACD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    953                                InstrStage<1, [SW_ALU1]>],
    954                               [8, 1, 1]>,
    955   //
    956   // Quad-register FP Multiple-Accumulate
    957   InstrItinData<IIC_VMACQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    958                                InstrStage<1, [SW_ALU1]>],
    959                               [8, 1, 1]>,
    960   //
    961   // Double-register Fused FP Multiple-Accumulate
    962   InstrItinData<IIC_VFMACD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    963                                InstrStage<1, [SW_ALU1]>],
    964                               [8, 1, 1]>,
    965   //
    966   // Quad-register FusedF P Multiple-Accumulate
    967   InstrItinData<IIC_VFMACQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    968                                InstrStage<1, [SW_ALU1]>],
    969                               [8, 1, 1]>,
    970   //
    971   // Double-register Reciprical Step
    972   InstrItinData<IIC_VRECSD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    973                                InstrStage<1, [SW_ALU1]>],
    974                               [8, 1, 1]>,
    975   //
    976   // Quad-register Reciprical Step
    977   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    978                                InstrStage<1, [SW_ALU1]>],
    979                               [8, 1, 1]>,
    980   //
    981   // Double-register Permute
    982   // FIXME: The latencies are unclear from the documentation.
    983   InstrItinData<IIC_VPERMD,   [InstrStage<1, [SW_DIS0], 0>,
    984                                InstrStage<1, [SW_DIS1], 0>,
    985                                InstrStage<1, [SW_DIS2], 0>,
    986                                InstrStage<1, [SW_ALU1], 2>,
    987                                InstrStage<1, [SW_ALU1], 2>,
    988                                InstrStage<1, [SW_ALU1]>],
    989                               [3, 4, 3, 4]>,
    990   //
    991   // Quad-register Permute
    992   // FIXME: The latencies are unclear from the documentation.
    993   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [SW_DIS0], 0>,
    994                                InstrStage<1, [SW_DIS1], 0>,
    995                                InstrStage<1, [SW_DIS2], 0>,
    996                                InstrStage<1, [SW_ALU1], 2>,
    997                                InstrStage<1, [SW_ALU1], 2>,
    998                                InstrStage<1, [SW_ALU1]>],
    999                               [3, 4, 3, 4]>,
   1000   //
   1001   // Quad-register Permute (3 cycle issue on A9)
   1002   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [SW_DIS0], 0>,
   1003                                InstrStage<1, [SW_DIS1], 0>,
   1004                                InstrStage<1, [SW_DIS2], 0>,
   1005                                InstrStage<1, [SW_ALU1], 2>,
   1006                                InstrStage<1, [SW_ALU1], 2>,
   1007                                InstrStage<1, [SW_ALU1]>],
   1008                               [3, 4, 3, 4]>,
   1009 
   1010   //
   1011   // Double-register VEXT
   1012   InstrItinData<IIC_VEXTD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
   1013                                InstrStage<1, [SW_ALU1]>],
   1014                               [2, 1, 1]>,
   1015   //
   1016   // Quad-register VEXT
   1017   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
   1018                                InstrStage<1, [SW_ALU1]>],
   1019                               [2, 1, 1]>,
   1020   //
   1021   // VTB
   1022   InstrItinData<IIC_VTB1,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
   1023                                InstrStage<1, [SW_ALU1]>],
   1024                               [2, 1, 1]>,
   1025   InstrItinData<IIC_VTB2,     [InstrStage<1, [SW_DIS0], 0>,
   1026                                InstrStage<1, [SW_DIS1], 0>,
   1027                                InstrStage<1, [SW_ALU1], 2>,
   1028                                InstrStage<1, [SW_ALU1]>],
   1029                               [4, 1, 3, 3]>,
   1030   InstrItinData<IIC_VTB3,     [InstrStage<1, [SW_DIS0], 0>,
   1031                                InstrStage<1, [SW_DIS1], 0>,
   1032                                InstrStage<1, [SW_DIS2], 0>,
   1033                                InstrStage<1, [SW_ALU1], 2>,
   1034                                InstrStage<1, [SW_ALU1], 2>,
   1035                                InstrStage<1, [SW_ALU1]>],
   1036                               [6, 1, 3, 5, 5]>,
   1037   InstrItinData<IIC_VTB4,     [InstrStage<1, [SW_DIS0], 0>,
   1038                                InstrStage<1, [SW_DIS1], 0>,
   1039                                InstrStage<1, [SW_DIS2], 0>,
   1040                                InstrStage<1, [SW_ALU1], 2>,
   1041                                InstrStage<1, [SW_ALU1], 2>,
   1042                                InstrStage<1, [SW_ALU1], 2>,
   1043                                InstrStage<1, [SW_ALU1]>],
   1044                               [8, 1, 3, 5, 7, 7]>,
   1045   //
   1046   // VTBX
   1047   InstrItinData<IIC_VTBX1,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
   1048                                InstrStage<1, [SW_ALU1]>],
   1049                               [2, 1, 1]>,
   1050   InstrItinData<IIC_VTBX2,    [InstrStage<1, [SW_DIS0], 0>,
   1051                                InstrStage<1, [SW_DIS1], 0>,
   1052                                InstrStage<1, [SW_ALU1], 2>,
   1053                                InstrStage<1, [SW_ALU1]>],
   1054                               [4, 1, 3, 3]>,
   1055   InstrItinData<IIC_VTBX3,    [InstrStage<1, [SW_DIS0], 0>,
   1056                                InstrStage<1, [SW_DIS1], 0>,
   1057                                InstrStage<1, [SW_DIS2], 0>,
   1058                                InstrStage<1, [SW_ALU1], 2>,
   1059                                InstrStage<1, [SW_ALU1], 2>,
   1060                                InstrStage<1, [SW_ALU1]>],
   1061                               [6, 1, 3, 5, 5]>,
   1062   InstrItinData<IIC_VTBX4,    [InstrStage<1, [SW_DIS0], 0>,
   1063                                InstrStage<1, [SW_DIS1], 0>,
   1064                                InstrStage<1, [SW_DIS2], 0>,
   1065                                InstrStage<1, [SW_ALU1], 2>,
   1066                                InstrStage<1, [SW_ALU1], 2>,
   1067                                InstrStage<1, [SW_ALU1], 2>,
   1068                                InstrStage<1, [SW_ALU1]>],
   1069                               [8, 1, 3, 5, 7, 7]>
   1070 ]>;
   1071 
   1072 // ===---------------------------------------------------------------------===//
   1073 // This following definitions describe the simple machine model which
   1074 // will replace itineraries.
   1075 
   1076 // Swift machine model for scheduling and other instruction cost heuristics.
   1077 def SwiftModel : SchedMachineModel {
   1078   let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
   1079   let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
   1080   let LoadLatency = 3;
   1081 
   1082   let Itineraries = SwiftItineraries;
   1083 }
   1084 
   1085 // TODO: Add Swift processor and scheduler resources.
   1086