Home | History | Annotate | Download | only in ARM
      1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 // ===---------------------------------------------------------------------===//
     15 // This section contains legacy support for itineraries. This is
     16 // required until SD and PostRA schedulers are replaced by MachineScheduler.
     17 
     18 //
     19 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
     20 // Reference Manual".
     21 //
     22 // Functional units
     23 def A9_Issue0  : FuncUnit; // Issue 0
     24 def A9_Issue1  : FuncUnit; // Issue 1
     25 def A9_Branch  : FuncUnit; // Branch
     26 def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
     27 def A9_ALU1    : FuncUnit; // ALU pipeline 1
     28 def A9_AGU     : FuncUnit; // Address generation unit for ld / st
     29 def A9_NPipe   : FuncUnit; // NEON pipeline
     30 def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
     31 def A9_LSUnit  : FuncUnit; // L/S Unit
     32 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
     33 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
     34 
     35 // Bypasses
     36 def A9_LdBypass : Bypass;
     37 
     38 def CortexA9Itineraries : ProcessorItineraries<
     39   [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
     40    A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
     41   [A9_LdBypass], [
     42   // Two fully-pipelined integer ALU pipelines
     43 
     44   //
     45   // Move instructions, unconditional
     46   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
     47                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
     48   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
     49                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
     50   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
     51                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
     52   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
     53                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
     54   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
     55                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
     56                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
     57   InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
     58                                   InstrStage<1, [A9_ALU0, A9_ALU1]>,
     59                                   InstrStage<1, [A9_ALU0, A9_ALU1]>,
     60                                   InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
     61   InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
     62                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
     63                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
     64                                InstrStage<1, [A9_MUX0], 0>,
     65                                InstrStage<1, [A9_AGU], 0>,
     66                                InstrStage<1, [A9_LSUnit]>], [5]>,
     67   //
     68   // MVN instructions
     69   InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
     70                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
     71                               [1]>,
     72   InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
     73                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
     74                               [1, 1], [NoBypass, A9_LdBypass]>,
     75   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
     76                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
     77                               [2, 1]>,
     78   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
     79                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
     80                               [3, 1, 1]>,
     81   //
     82   // No operand cycles
     83   InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
     84                                InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
     85   //
     86   // Binary Instructions that produce a result
     87   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
     88                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
     89                             [1, 1], [NoBypass, A9_LdBypass]>,
     90   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
     91                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
     92                             [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
     93   InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
     94                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
     95                             [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
     96   InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
     97                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
     98                             [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
     99   InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    100                              InstrStage<3, [A9_ALU0, A9_ALU1]>],
    101                             [3, 1, 1, 1],
    102                             [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
    103   //
    104   // Bitwise Instructions that produce a result
    105   InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    106                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
    107   InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    108                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
    109   InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    110                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
    111   InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    112                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
    113   //
    114   // Unary Instructions that produce a result
    115 
    116   // CLZ, RBIT, etc.
    117   InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    118                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
    119 
    120   // BFC, BFI, UBFX, SBFX
    121   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    122                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
    123 
    124   //
    125   // Zero and sign extension instructions
    126   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    127                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
    128   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    129                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
    130   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    131                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
    132   //
    133   // Compare instructions
    134   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    135                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
    136                                [1], [A9_LdBypass]>,
    137   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    138                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
    139                                [1, 1], [A9_LdBypass, A9_LdBypass]>,
    140   InstrItinData<IIC_iCMPsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    141                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
    142                                 [1, 1], [A9_LdBypass, NoBypass]>,
    143   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    144                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
    145                               [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
    146   //
    147   // Test instructions
    148   InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    149                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
    150   InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    151                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
    152   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    153                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
    154   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    155                                InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
    156   //
    157   // Move instructions, conditional
    158   // FIXME: Correctly model the extra input dep on the destination.
    159   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    160                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
    161   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    162                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
    163   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    164                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
    165   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    166                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
    167   InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    168                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
    169                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    170                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
    171 
    172   // Integer multiply pipeline
    173   //
    174   InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    175                                InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
    176   InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    177                                InstrStage<2, [A9_ALU0]>],
    178                               [3, 1, 1, 1]>,
    179   InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    180                                InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
    181   InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    182                                InstrStage<2, [A9_ALU0]>],
    183                               [4, 1, 1, 1]>,
    184   InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    185                                InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
    186   InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    187                                InstrStage<3, [A9_ALU0]>],
    188                               [4, 5, 1, 1]>,
    189   // Integer load pipeline
    190   // FIXME: The timings are some rough approximations
    191   //
    192   // Immediate offset
    193   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    194                                  InstrStage<1, [A9_MUX0], 0>,
    195                                  InstrStage<1, [A9_AGU], 0>,
    196                                  InstrStage<1, [A9_LSUnit]>],
    197                                 [3, 1], [A9_LdBypass]>,
    198   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    199                                  InstrStage<1, [A9_MUX0], 0>,
    200                                  InstrStage<2, [A9_AGU], 0>,
    201                                  InstrStage<1, [A9_LSUnit]>],
    202                                 [4, 1], [A9_LdBypass]>,
    203   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
    204   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    205                                  InstrStage<1, [A9_MUX0], 0>,
    206                                  InstrStage<2, [A9_AGU], 0>,
    207                                  InstrStage<1, [A9_LSUnit]>],
    208                                 [3, 3, 1], [A9_LdBypass]>,
    209   //
    210   // Register offset
    211   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    212                                  InstrStage<1, [A9_MUX0], 0>,
    213                                  InstrStage<1, [A9_AGU], 0>,
    214                                  InstrStage<1, [A9_LSUnit]>],
    215                                 [3, 1, 1], [A9_LdBypass]>,
    216   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    217                                  InstrStage<1, [A9_MUX0], 0>,
    218                                  InstrStage<2, [A9_AGU], 0>,
    219                                  InstrStage<1, [A9_LSUnit]>],
    220                                 [4, 1, 1], [A9_LdBypass]>,
    221   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    222                                  InstrStage<1, [A9_MUX0], 0>,
    223                                  InstrStage<2, [A9_AGU], 0>,
    224                                  InstrStage<1, [A9_LSUnit]>],
    225                                 [3, 3, 1, 1], [A9_LdBypass]>,
    226   //
    227   // Scaled register offset
    228   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    229                                  InstrStage<1, [A9_MUX0], 0>,
    230                                  InstrStage<1, [A9_AGU], 0>,
    231                                  InstrStage<1, [A9_LSUnit], 0>],
    232                                 [4, 1, 1], [A9_LdBypass]>,
    233   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    234                                  InstrStage<1, [A9_MUX0], 0>,
    235                                  InstrStage<2, [A9_AGU], 0>,
    236                                  InstrStage<1, [A9_LSUnit]>],
    237                                 [5, 1, 1], [A9_LdBypass]>,
    238   //
    239   // Immediate offset with update
    240   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    241                                  InstrStage<1, [A9_MUX0], 0>,
    242                                  InstrStage<1, [A9_AGU], 0>,
    243                                  InstrStage<1, [A9_LSUnit]>],
    244                                 [3, 2, 1], [A9_LdBypass]>,
    245   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    246                                  InstrStage<1, [A9_MUX0], 0>,
    247                                  InstrStage<2, [A9_AGU], 0>,
    248                                  InstrStage<1, [A9_LSUnit]>],
    249                                 [4, 3, 1], [A9_LdBypass]>,
    250   //
    251   // Register offset with update
    252   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    253                                  InstrStage<1, [A9_MUX0], 0>,
    254                                  InstrStage<1, [A9_AGU], 0>,
    255                                  InstrStage<1, [A9_LSUnit]>],
    256                                 [3, 2, 1, 1], [A9_LdBypass]>,
    257   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    258                                  InstrStage<1, [A9_MUX0], 0>,
    259                                  InstrStage<2, [A9_AGU], 0>,
    260                                  InstrStage<1, [A9_LSUnit]>],
    261                                 [4, 3, 1, 1], [A9_LdBypass]>,
    262   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    263                                  InstrStage<1, [A9_MUX0], 0>,
    264                                  InstrStage<2, [A9_AGU], 0>,
    265                                  InstrStage<1, [A9_LSUnit]>],
    266                                 [3, 3, 1, 1], [A9_LdBypass]>,
    267   //
    268   // Scaled register offset with update
    269   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    270                                  InstrStage<1, [A9_MUX0], 0>,
    271                                  InstrStage<1, [A9_AGU], 0>,
    272                                  InstrStage<1, [A9_LSUnit]>],
    273                                 [4, 3, 1, 1], [A9_LdBypass]>,
    274   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    275                                   InstrStage<1, [A9_MUX0], 0>,
    276                                   InstrStage<2, [A9_AGU], 0>,
    277                                   InstrStage<1, [A9_LSUnit]>],
    278                                  [5, 4, 1, 1], [A9_LdBypass]>,
    279   //
    280   // Load multiple, def is the 5th operand.
    281   // FIXME: This assumes 3 to 4 registers.
    282   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    283                                 InstrStage<1, [A9_MUX0], 0>,
    284                                 InstrStage<2, [A9_AGU], 1>,
    285                                 InstrStage<2, [A9_LSUnit]>],
    286                                [1, 1, 1, 1, 3],
    287                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
    288                          -1>, // dynamic uops
    289   //
    290   // Load multiple + update, defs are the 1st and 5th operands.
    291   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    292                                 InstrStage<1, [A9_MUX0], 0>,
    293                                 InstrStage<2, [A9_AGU], 1>,
    294                                 InstrStage<2, [A9_LSUnit]>],
    295                                [2, 1, 1, 1, 3],
    296                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
    297                          -1>, // dynamic uops
    298   //
    299   // Load multiple plus branch
    300   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    301                                 InstrStage<1, [A9_MUX0], 0>,
    302                                 InstrStage<1, [A9_AGU], 1>,
    303                                 InstrStage<2, [A9_LSUnit]>,
    304                                 InstrStage<1, [A9_Branch]>],
    305                                [1, 2, 1, 1, 3],
    306                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
    307                          -1>, // dynamic uops
    308   //
    309   // Pop, def is the 3rd operand.
    310   InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    311                                 InstrStage<1, [A9_MUX0], 0>,
    312                                 InstrStage<2, [A9_AGU], 1>,
    313                                 InstrStage<2, [A9_LSUnit]>],
    314                                [1, 1, 3],
    315                                [NoBypass, NoBypass, A9_LdBypass],
    316                                -1>, // dynamic uops
    317   //
    318   // Pop + branch, def is the 3rd operand.
    319   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    320                                 InstrStage<1, [A9_MUX0], 0>,
    321                                 InstrStage<2, [A9_AGU], 1>,
    322                                 InstrStage<2, [A9_LSUnit]>,
    323                                 InstrStage<1, [A9_Branch]>],
    324                                [1, 1, 3],
    325                                [NoBypass, NoBypass, A9_LdBypass],
    326                                -1>, // dynamic uops
    327   //
    328   // iLoadi + iALUr for t2LDRpci_pic.
    329   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    330                                 InstrStage<1, [A9_MUX0], 0>,
    331                                 InstrStage<1, [A9_AGU], 0>,
    332                                 InstrStage<1, [A9_LSUnit]>,
    333                                 InstrStage<1, [A9_ALU0, A9_ALU1]>],
    334                                [2, 1]>,
    335 
    336   // Integer store pipeline
    337   ///
    338   // Immediate offset
    339   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    340                                  InstrStage<1, [A9_MUX0], 0>,
    341                                  InstrStage<1, [A9_AGU], 0>,
    342                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
    343   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    344                                  InstrStage<1, [A9_MUX0], 0>,
    345                                  InstrStage<2, [A9_AGU], 1>,
    346                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
    347   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
    348   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    349                                  InstrStage<1, [A9_MUX0], 0>,
    350                                  InstrStage<2, [A9_AGU], 1>,
    351                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
    352   //
    353   // Register offset
    354   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    355                                  InstrStage<1, [A9_MUX0], 0>,
    356                                  InstrStage<1, [A9_AGU], 0>,
    357                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
    358   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    359                                  InstrStage<1, [A9_MUX0], 0>,
    360                                  InstrStage<2, [A9_AGU], 1>,
    361                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
    362   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    363                                  InstrStage<1, [A9_MUX0], 0>,
    364                                  InstrStage<2, [A9_AGU], 1>,
    365                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
    366   //
    367   // Scaled register offset
    368   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    369                                   InstrStage<1, [A9_MUX0], 0>,
    370                                   InstrStage<1, [A9_AGU], 0>,
    371                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
    372   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    373                                   InstrStage<1, [A9_MUX0], 0>,
    374                                   InstrStage<2, [A9_AGU], 1>,
    375                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
    376   //
    377   // Immediate offset with update
    378   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    379                                   InstrStage<1, [A9_MUX0], 0>,
    380                                   InstrStage<1, [A9_AGU], 0>,
    381                                   InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
    382   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    383                                   InstrStage<1, [A9_MUX0], 0>,
    384                                   InstrStage<2, [A9_AGU], 1>,
    385                                   InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
    386   //
    387   // Register offset with update
    388   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    389                                   InstrStage<1, [A9_MUX0], 0>,
    390                                   InstrStage<1, [A9_AGU], 0>,
    391                                   InstrStage<1, [A9_LSUnit]>],
    392                                  [2, 1, 1, 1]>,
    393   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    394                                   InstrStage<1, [A9_MUX0], 0>,
    395                                   InstrStage<2, [A9_AGU], 1>,
    396                                   InstrStage<1, [A9_LSUnit]>],
    397                                  [3, 1, 1, 1]>,
    398   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    399                                   InstrStage<1, [A9_MUX0], 0>,
    400                                   InstrStage<2, [A9_AGU], 1>,
    401                                   InstrStage<1, [A9_LSUnit]>],
    402                                  [3, 1, 1, 1]>,
    403   //
    404   // Scaled register offset with update
    405   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    406                                     InstrStage<1, [A9_MUX0], 0>,
    407                                     InstrStage<1, [A9_AGU], 0>,
    408                                     InstrStage<1, [A9_LSUnit]>],
    409                                    [2, 1, 1, 1]>,
    410   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    411                                     InstrStage<1, [A9_MUX0], 0>,
    412                                     InstrStage<2, [A9_AGU], 1>,
    413                                     InstrStage<1, [A9_LSUnit]>],
    414                                    [3, 1, 1, 1]>,
    415   //
    416   // Store multiple
    417   InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    418                                 InstrStage<1, [A9_MUX0], 0>,
    419                                 InstrStage<1, [A9_AGU], 0>,
    420                                 InstrStage<2, [A9_LSUnit]>],
    421                 [], [], -1>, // dynamic uops
    422   //
    423   // Store multiple + update
    424   InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    425                                 InstrStage<1, [A9_MUX0], 0>,
    426                                 InstrStage<1, [A9_AGU], 0>,
    427                                 InstrStage<2, [A9_LSUnit]>],
    428                 [2], [], -1>, // dynamic uops
    429   //
    430   // Preload
    431   InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
    432 
    433   // Branch
    434   //
    435   // no delay slots, so the latency of a branch is unimportant
    436   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
    437                                 InstrStage<1, [A9_Issue1], 0>,
    438                                 InstrStage<1, [A9_Branch]>]>,
    439 
    440   // VFP and NEON shares the same register file. This means that every VFP
    441   // instruction should wait for full completion of the consecutive NEON
    442   // instruction and vice-versa. We model this behavior with two artificial FUs:
    443   // DRegsVFP and DRegsVFP.
    444   //
    445   // Every VFP instruction:
    446   //  - Acquires DRegsVFP resource for 1 cycle
    447   //  - Reserves DRegsN resource for the whole duration (including time to
    448   //    register file writeback!).
    449   // Every NEON instruction does the same but with FUs swapped.
    450   //
    451   // Since the reserved FU cannot be acquired, this models precisely
    452   // "cross-domain" stalls.
    453 
    454   // VFP
    455   // Issue through integer pipeline, and execute in NEON unit.
    456 
    457   // FP Special Register to Integer Register File Move
    458   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    459                               InstrStage<1, [A9_MUX0], 0>,
    460                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
    461                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
    462                               InstrStage<1, [A9_NPipe]>],
    463                              [1]>,
    464   //
    465   // Single-precision FP Unary
    466   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    467                                InstrStage<1, [A9_MUX0], 0>,
    468                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    469                                // Extra latency cycles since wbck is 2 cycles
    470                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
    471                                InstrStage<1, [A9_NPipe]>],
    472                               [1, 1]>,
    473   //
    474   // Double-precision FP Unary
    475   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    476                                InstrStage<1, [A9_MUX0], 0>,
    477                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    478                                // Extra latency cycles since wbck is 2 cycles
    479                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
    480                                InstrStage<1, [A9_NPipe]>],
    481                               [1, 1]>,
    482 
    483   //
    484   // Single-precision FP Compare
    485   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    486                                InstrStage<1, [A9_MUX0], 0>,
    487                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    488                                // Extra latency cycles since wbck is 4 cycles
    489                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
    490                                InstrStage<1, [A9_NPipe]>],
    491                               [1, 1]>,
    492   //
    493   // Double-precision FP Compare
    494   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    495                                InstrStage<1, [A9_MUX0], 0>,
    496                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    497                                // Extra latency cycles since wbck is 4 cycles
    498                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
    499                                InstrStage<1, [A9_NPipe]>],
    500                               [1, 1]>,
    501   //
    502   // Single to Double FP Convert
    503   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    504                                InstrStage<1, [A9_MUX0], 0>,
    505                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    506                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
    507                                InstrStage<1, [A9_NPipe]>],
    508                               [4, 1]>,
    509   //
    510   // Double to Single FP Convert
    511   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    512                                InstrStage<1, [A9_MUX0], 0>,
    513                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    514                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
    515                                InstrStage<1, [A9_NPipe]>],
    516                               [4, 1]>,
    517 
    518   //
    519   // Single to Half FP Convert
    520   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    521                                InstrStage<1, [A9_MUX0], 0>,
    522                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    523                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
    524                                InstrStage<1, [A9_NPipe]>],
    525                               [4, 1]>,
    526   //
    527   // Half to Single FP Convert
    528   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    529                                InstrStage<1, [A9_MUX0], 0>,
    530                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    531                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
    532                                InstrStage<1, [A9_NPipe]>],
    533                               [2, 1]>,
    534 
    535   //
    536   // Single-Precision FP to Integer Convert
    537   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    538                                InstrStage<1, [A9_MUX0], 0>,
    539                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    540                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
    541                                InstrStage<1, [A9_NPipe]>],
    542                               [4, 1]>,
    543   //
    544   // Double-Precision FP to Integer Convert
    545   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    546                                InstrStage<1, [A9_MUX0], 0>,
    547                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    548                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
    549                                InstrStage<1, [A9_NPipe]>],
    550                               [4, 1]>,
    551   //
    552   // Integer to Single-Precision FP Convert
    553   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    554                                InstrStage<1, [A9_MUX0], 0>,
    555                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    556                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
    557                                InstrStage<1, [A9_NPipe]>],
    558                               [4, 1]>,
    559   //
    560   // Integer to Double-Precision FP Convert
    561   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    562                                InstrStage<1, [A9_MUX0], 0>,
    563                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    564                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
    565                                InstrStage<1, [A9_NPipe]>],
    566                               [4, 1]>,
    567   //
    568   // Single-precision FP ALU
    569   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    570                                InstrStage<1, [A9_MUX0], 0>,
    571                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    572                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
    573                                InstrStage<1, [A9_NPipe]>],
    574                               [4, 1, 1]>,
    575   //
    576   // Double-precision FP ALU
    577   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    578                                InstrStage<1, [A9_MUX0], 0>,
    579                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    580                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
    581                                InstrStage<1, [A9_NPipe]>],
    582                               [4, 1, 1]>,
    583   //
    584   // Single-precision FP Multiply
    585   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    586                                InstrStage<1, [A9_MUX0], 0>,
    587                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    588                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
    589                                InstrStage<1, [A9_NPipe]>],
    590                               [5, 1, 1]>,
    591   //
    592   // Double-precision FP Multiply
    593   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    594                                InstrStage<1, [A9_MUX0], 0>,
    595                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    596                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
    597                                InstrStage<2, [A9_NPipe]>],
    598                               [6, 1, 1]>,
    599   //
    600   // Single-precision FP MAC
    601   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    602                                InstrStage<1, [A9_MUX0], 0>,
    603                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    604                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
    605                                InstrStage<1, [A9_NPipe]>],
    606                               [8, 1, 1, 1]>,
    607   //
    608   // Double-precision FP MAC
    609   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
    610                                InstrStage<1,  [A9_MUX0], 0>,
    611                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
    612                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
    613                                InstrStage<2,  [A9_NPipe]>],
    614                               [9, 1, 1, 1]>,
    615   //
    616   // Single-precision Fused FP MAC
    617   InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    618                                InstrStage<1, [A9_MUX0], 0>,
    619                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    620                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
    621                                InstrStage<1, [A9_NPipe]>],
    622                               [8, 1, 1, 1]>,
    623   //
    624   // Double-precision Fused FP MAC
    625   InstrItinData<IIC_fpFMAC64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
    626                                InstrStage<1,  [A9_MUX0], 0>,
    627                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
    628                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
    629                                InstrStage<2,  [A9_NPipe]>],
    630                               [9, 1, 1, 1]>,
    631   //
    632   // Single-precision FP DIV
    633   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
    634                                InstrStage<1,  [A9_MUX0], 0>,
    635                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
    636                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
    637                                InstrStage<10, [A9_NPipe]>],
    638                               [15, 1, 1]>,
    639   //
    640   // Double-precision FP DIV
    641   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
    642                                InstrStage<1,  [A9_MUX0], 0>,
    643                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
    644                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
    645                                InstrStage<20, [A9_NPipe]>],
    646                               [25, 1, 1]>,
    647   //
    648   // Single-precision FP SQRT
    649   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
    650                                InstrStage<1,  [A9_MUX0], 0>,
    651                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
    652                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
    653                                InstrStage<13, [A9_NPipe]>],
    654                               [17, 1]>,
    655   //
    656   // Double-precision FP SQRT
    657   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
    658                                InstrStage<1,  [A9_MUX0], 0>,
    659                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
    660                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
    661                                InstrStage<28, [A9_NPipe]>],
    662                               [32, 1]>,
    663 
    664   //
    665   // Integer to Single-precision Move
    666   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    667                                InstrStage<1, [A9_MUX0], 0>,
    668                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    669                                // Extra 1 latency cycle since wbck is 2 cycles
    670                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
    671                                InstrStage<1, [A9_NPipe]>],
    672                               [1, 1]>,
    673   //
    674   // Integer to Double-precision Move
    675   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    676                                InstrStage<1, [A9_MUX0], 0>,
    677                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    678                                // Extra 1 latency cycle since wbck is 2 cycles
    679                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
    680                                InstrStage<1, [A9_NPipe]>],
    681                               [1, 1, 1]>,
    682   //
    683   // Single-precision to Integer Move
    684   //
    685   // On A9 move-from-VFP is free to issue with no stall if other VFP
    686   // operations are in flight. I assume it still can't dual-issue though.
    687   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    688                                InstrStage<1, [A9_MUX0], 0>],
    689                               [2, 1]>,
    690   //
    691   // Double-precision to Integer Move
    692   //
    693   // On A9 move-from-VFP is free to issue with no stall if other VFP
    694   // operations are in flight. I assume it still can't dual-issue though.
    695   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    696                                InstrStage<1, [A9_MUX0], 0>],
    697                               [2, 1, 1]>,
    698   //
    699   // Single-precision FP Load
    700   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    701                                InstrStage<1, [A9_MUX0], 0>,
    702                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    703                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
    704                                InstrStage<1, [A9_NPipe], 0>,
    705                                InstrStage<1, [A9_LSUnit]>],
    706                               [1, 1]>,
    707   //
    708   // Double-precision FP Load
    709   // FIXME: Result latency is 1 if address is 64-bit aligned.
    710   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    711                                InstrStage<1, [A9_MUX0], 0>,
    712                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    713                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
    714                                InstrStage<1, [A9_NPipe], 0>,
    715                                InstrStage<1, [A9_LSUnit]>],
    716                               [2, 1]>,
    717   //
    718   // FP Load Multiple
    719   // FIXME: assumes 2 doubles which requires 2 LS cycles.
    720   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    721                                InstrStage<1, [A9_MUX0], 0>,
    722                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    723                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
    724                                InstrStage<1, [A9_NPipe], 0>,
    725                                InstrStage<2, [A9_LSUnit]>],
    726                 [1, 1, 1, 1], [], -1>, // dynamic uops
    727   //
    728   // FP Load Multiple + update
    729   // FIXME: assumes 2 doubles which requires 2 LS cycles.
    730   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    731                                InstrStage<1, [A9_MUX0], 0>,
    732                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    733                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
    734                                InstrStage<1, [A9_NPipe], 0>,
    735                                InstrStage<2, [A9_LSUnit]>],
    736                 [2, 1, 1, 1], [], -1>, // dynamic uops
    737   //
    738   // Single-precision FP Store
    739   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    740                                InstrStage<1, [A9_MUX0], 0>,
    741                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    742                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
    743                                InstrStage<1, [A9_NPipe], 0>,
    744                                InstrStage<1, [A9_LSUnit]>],
    745                               [1, 1]>,
    746   //
    747   // Double-precision FP Store
    748   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    749                                InstrStage<1, [A9_MUX0], 0>,
    750                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    751                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
    752                                InstrStage<1, [A9_NPipe], 0>,
    753                                InstrStage<1, [A9_LSUnit]>],
    754                               [1, 1]>,
    755   //
    756   // FP Store Multiple
    757   // FIXME: assumes 2 doubles which requires 2 LS cycles.
    758   InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    759                                InstrStage<1, [A9_MUX0], 0>,
    760                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
    761                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
    762                                InstrStage<1, [A9_NPipe], 0>,
    763                                InstrStage<2, [A9_LSUnit]>],
    764                 [1, 1, 1, 1], [], -1>, // dynamic uops
    765   //
    766   // FP Store Multiple + update
    767   // FIXME: assumes 2 doubles which requires 2 LS cycles.
    768   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    769                                 InstrStage<1, [A9_MUX0], 0>,
    770                                 InstrStage<1, [A9_DRegsVFP], 0, Required>,
    771                                 InstrStage<2, [A9_DRegsN],   0, Reserved>,
    772                                 InstrStage<1, [A9_NPipe], 0>,
    773                                 InstrStage<2, [A9_LSUnit]>],
    774                 [2, 1, 1, 1], [], -1>, // dynamic uops
    775   // NEON
    776   // VLD1
    777   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    778                                InstrStage<1, [A9_MUX0], 0>,
    779                                InstrStage<1, [A9_DRegsN],   0, Required>,
    780                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
    781                                InstrStage<1, [A9_NPipe], 0>,
    782                                InstrStage<1, [A9_LSUnit]>],
    783                               [1, 1]>,
    784   // VLD1x2
    785   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    786                                InstrStage<1, [A9_MUX0], 0>,
    787                                InstrStage<1, [A9_DRegsN],   0, Required>,
    788                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
    789                                InstrStage<1, [A9_NPipe], 0>,
    790                                InstrStage<1, [A9_LSUnit]>],
    791                               [1, 1, 1]>,
    792   // VLD1x3
    793   InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    794                                InstrStage<1, [A9_MUX0], 0>,
    795                                InstrStage<1, [A9_DRegsN],   0, Required>,
    796                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
    797                                InstrStage<2, [A9_NPipe], 0>,
    798                                InstrStage<2, [A9_LSUnit]>],
    799                               [1, 1, 2, 1]>,
    800   // VLD1x4
    801   InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    802                                InstrStage<1, [A9_MUX0], 0>,
    803                                InstrStage<1, [A9_DRegsN],   0, Required>,
    804                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
    805                                InstrStage<2, [A9_NPipe], 0>,
    806                                InstrStage<2, [A9_LSUnit]>],
    807                               [1, 1, 2, 2, 1]>,
    808   // VLD1u
    809   InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    810                                InstrStage<1, [A9_MUX0], 0>,
    811                                InstrStage<1, [A9_DRegsN],   0, Required>,
    812                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
    813                                InstrStage<1, [A9_NPipe], 0>,
    814                                InstrStage<1, [A9_LSUnit]>],
    815                               [1, 2, 1]>,
    816   // VLD1x2u
    817   InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    818                                InstrStage<1, [A9_MUX0], 0>,
    819                                InstrStage<1, [A9_DRegsN],   0, Required>,
    820                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
    821                                InstrStage<1, [A9_NPipe], 0>,
    822                                InstrStage<1, [A9_LSUnit]>],
    823                               [1, 1, 2, 1]>,
    824   // VLD1x3u
    825   InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    826                                InstrStage<1, [A9_MUX0], 0>,
    827                                InstrStage<1, [A9_DRegsN],   0, Required>,
    828                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
    829                                InstrStage<2, [A9_NPipe], 0>,
    830                                InstrStage<2, [A9_LSUnit]>],
    831                               [1, 1, 2, 2, 1]>,
    832   // VLD1x4u
    833   InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    834                                InstrStage<1, [A9_MUX0], 0>,
    835                                InstrStage<1, [A9_DRegsN],   0, Required>,
    836                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
    837                                InstrStage<2, [A9_NPipe], 0>,
    838                                InstrStage<2, [A9_LSUnit]>],
    839                               [1, 1, 2, 2, 2, 1]>,
    840   //
    841   // VLD1ln
    842   InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    843                                InstrStage<1, [A9_MUX0], 0>,
    844                                InstrStage<1, [A9_DRegsN],   0, Required>,
    845                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
    846                                InstrStage<2, [A9_NPipe], 0>,
    847                                InstrStage<2, [A9_LSUnit]>],
    848                               [3, 1, 1, 1]>,
    849   //
    850   // VLD1lnu
    851   InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    852                                InstrStage<1, [A9_MUX0], 0>,
    853                                InstrStage<1, [A9_DRegsN],   0, Required>,
    854                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
    855                                InstrStage<2, [A9_NPipe], 0>,
    856                                InstrStage<2, [A9_LSUnit]>],
    857                               [3, 2, 1, 1, 1, 1]>,
    858   //
    859   // VLD1dup
    860   InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    861                                InstrStage<1, [A9_MUX0], 0>,
    862                                InstrStage<1, [A9_DRegsN],   0, Required>,
    863                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
    864                                InstrStage<1, [A9_NPipe], 0>,
    865                                InstrStage<1, [A9_LSUnit]>],
    866                               [2, 1]>,
    867   //
    868   // VLD1dupu
    869   InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    870                                InstrStage<1, [A9_MUX0], 0>,
    871                                InstrStage<1, [A9_DRegsN],   0, Required>,
    872                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
    873                                InstrStage<1, [A9_NPipe], 0>,
    874                                InstrStage<1, [A9_LSUnit]>],
    875                               [2, 2, 1, 1]>,
    876   //
    877   // VLD2
    878   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    879                                InstrStage<1, [A9_MUX0], 0>,
    880                                InstrStage<1, [A9_DRegsN],   0, Required>,
    881                                // Extra latency cycles since wbck is 7 cycles
    882                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
    883                                InstrStage<1, [A9_NPipe], 0>,
    884                                InstrStage<1, [A9_LSUnit]>],
    885                               [2, 2, 1]>,
    886   //
    887   // VLD2x2
    888   InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    889                                InstrStage<1, [A9_MUX0], 0>,
    890                                InstrStage<1, [A9_DRegsN],   0, Required>,
    891                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
    892                                InstrStage<2, [A9_NPipe], 0>,
    893                                InstrStage<2, [A9_LSUnit]>],
    894                               [2, 3, 2, 3, 1]>,
    895   //
    896   // VLD2ln
    897   InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    898                                InstrStage<1, [A9_MUX0], 0>,
    899                                InstrStage<1, [A9_DRegsN],   0, Required>,
    900                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
    901                                InstrStage<2, [A9_NPipe], 0>,
    902                                InstrStage<2, [A9_LSUnit]>],
    903                               [3, 3, 1, 1, 1, 1]>,
    904   //
    905   // VLD2u
    906   InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    907                                InstrStage<1, [A9_MUX0], 0>,
    908                                InstrStage<1, [A9_DRegsN],   0, Required>,
    909                                // Extra latency cycles since wbck is 7 cycles
    910                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
    911                                InstrStage<1, [A9_NPipe], 0>,
    912                                InstrStage<1, [A9_LSUnit]>],
    913                               [2, 2, 2, 1, 1, 1]>,
    914   //
    915   // VLD2x2u
    916   InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    917                                InstrStage<1, [A9_MUX0], 0>,
    918                                InstrStage<1, [A9_DRegsN],   0, Required>,
    919                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
    920                                InstrStage<2, [A9_NPipe], 0>,
    921                                InstrStage<2, [A9_LSUnit]>],
    922                               [2, 3, 2, 3, 2, 1]>,
    923   //
    924   // VLD2lnu
    925   InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    926                                InstrStage<1, [A9_MUX0], 0>,
    927                                InstrStage<1, [A9_DRegsN],   0, Required>,
    928                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
    929                                InstrStage<2, [A9_NPipe], 0>,
    930                                InstrStage<2, [A9_LSUnit]>],
    931                               [3, 3, 2, 1, 1, 1, 1, 1]>,
    932   //
    933   // VLD2dup
    934   InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    935                                InstrStage<1, [A9_MUX0], 0>,
    936                                InstrStage<1, [A9_DRegsN],   0, Required>,
    937                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
    938                                InstrStage<1, [A9_NPipe], 0>,
    939                                InstrStage<1, [A9_LSUnit]>],
    940                               [2, 2, 1]>,
    941   //
    942   // VLD2dupu
    943   InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    944                                InstrStage<1, [A9_MUX0], 0>,
    945                                InstrStage<1, [A9_DRegsN],   0, Required>,
    946                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
    947                                InstrStage<1, [A9_NPipe], 0>,
    948                                InstrStage<1, [A9_LSUnit]>],
    949                               [2, 2, 2, 1, 1]>,
    950   //
    951   // VLD3
    952   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    953                                InstrStage<1, [A9_MUX0], 0>,
    954                                InstrStage<1, [A9_DRegsN],   0, Required>,
    955                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
    956                                InstrStage<3, [A9_NPipe], 0>,
    957                                InstrStage<3, [A9_LSUnit]>],
    958                               [3, 3, 4, 1]>,
    959   //
    960   // VLD3ln
    961   InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    962                                InstrStage<1, [A9_MUX0], 0>,
    963                                InstrStage<1, [A9_DRegsN],   0, Required>,
    964                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
    965                                InstrStage<5, [A9_NPipe], 0>,
    966                                InstrStage<5, [A9_LSUnit]>],
    967                               [5, 5, 6, 1, 1, 1, 1, 2]>,
    968   //
    969   // VLD3u
    970   InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    971                                InstrStage<1, [A9_MUX0], 0>,
    972                                InstrStage<1, [A9_DRegsN],   0, Required>,
    973                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
    974                                InstrStage<3, [A9_NPipe], 0>,
    975                                InstrStage<3, [A9_LSUnit]>],
    976                               [3, 3, 4, 2, 1]>,
    977   //
    978   // VLD3lnu
    979   InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    980                                InstrStage<1, [A9_MUX0], 0>,
    981                                InstrStage<1, [A9_DRegsN],   0, Required>,
    982                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
    983                                InstrStage<5, [A9_NPipe], 0>,
    984                                InstrStage<5, [A9_LSUnit]>],
    985                               [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
    986   //
    987   // VLD3dup
    988   InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    989                                InstrStage<1, [A9_MUX0], 0>,
    990                                InstrStage<1, [A9_DRegsN],   0, Required>,
    991                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
    992                                InstrStage<3, [A9_NPipe], 0>,
    993                                InstrStage<3, [A9_LSUnit]>],
    994                               [3, 3, 4, 1]>,
    995   //
    996   // VLD3dupu
    997   InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
    998                                InstrStage<1, [A9_MUX0], 0>,
    999                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1000                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
   1001                                InstrStage<3, [A9_NPipe], 0>,
   1002                                InstrStage<3, [A9_LSUnit]>],
   1003                               [3, 3, 4, 2, 1, 1]>,
   1004   //
   1005   // VLD4
   1006   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1007                                InstrStage<1, [A9_MUX0], 0>,
   1008                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1009                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
   1010                                InstrStage<3, [A9_NPipe], 0>,
   1011                                InstrStage<3, [A9_LSUnit]>],
   1012                               [3, 3, 4, 4, 1]>,
   1013   //
   1014   // VLD4ln
   1015   InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1016                                InstrStage<1, [A9_MUX0], 0>,
   1017                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1018                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
   1019                                InstrStage<4, [A9_NPipe], 0>,
   1020                                InstrStage<4, [A9_LSUnit]>],
   1021                               [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
   1022   //
   1023   // VLD4u
   1024   InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1025                                InstrStage<1, [A9_MUX0], 0>,
   1026                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1027                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
   1028                                InstrStage<3, [A9_NPipe], 0>,
   1029                                InstrStage<3, [A9_LSUnit]>],
   1030                               [3, 3, 4, 4, 2, 1]>,
   1031   //
   1032   // VLD4lnu
   1033   InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1034                                InstrStage<1, [A9_MUX0], 0>,
   1035                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1036                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
   1037                                InstrStage<4, [A9_NPipe], 0>,
   1038                                InstrStage<4, [A9_LSUnit]>],
   1039                               [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
   1040   //
   1041   // VLD4dup
   1042   InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1043                                InstrStage<1, [A9_MUX0], 0>,
   1044                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1045                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1046                                InstrStage<2, [A9_NPipe], 0>,
   1047                                InstrStage<2, [A9_LSUnit]>],
   1048                               [2, 2, 3, 3, 1]>,
   1049   //
   1050   // VLD4dupu
   1051   InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1052                                InstrStage<1, [A9_MUX0], 0>,
   1053                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1054                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1055                                InstrStage<2, [A9_NPipe], 0>,
   1056                                InstrStage<2, [A9_LSUnit]>],
   1057                               [2, 2, 3, 3, 2, 1, 1]>,
   1058   //
   1059   // VST1
   1060   InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1061                                InstrStage<1, [A9_MUX0], 0>,
   1062                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1063                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
   1064                                InstrStage<1, [A9_NPipe], 0>,
   1065                                InstrStage<1, [A9_LSUnit]>],
   1066                               [1, 1, 1]>,
   1067   //
   1068   // VST1x2
   1069   InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1070                                InstrStage<1, [A9_MUX0], 0>,
   1071                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1072                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
   1073                                InstrStage<1, [A9_NPipe], 0>,
   1074                                InstrStage<1, [A9_LSUnit]>],
   1075                               [1, 1, 1, 1]>,
   1076   //
   1077   // VST1x3
   1078   InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1079                                InstrStage<1, [A9_MUX0], 0>,
   1080                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1081                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
   1082                                InstrStage<2, [A9_NPipe], 0>,
   1083                                InstrStage<2, [A9_LSUnit]>],
   1084                               [1, 1, 1, 1, 2]>,
   1085   //
   1086   // VST1x4
   1087   InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1088                                InstrStage<1, [A9_MUX0], 0>,
   1089                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1090                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
   1091                                InstrStage<2, [A9_NPipe], 0>,
   1092                                InstrStage<2, [A9_LSUnit]>],
   1093                               [1, 1, 1, 1, 2, 2]>,
   1094   //
   1095   // VST1u
   1096   InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1097                                InstrStage<1, [A9_MUX0], 0>,
   1098                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1099                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
   1100                                InstrStage<1, [A9_NPipe], 0>,
   1101                                InstrStage<1, [A9_LSUnit]>],
   1102                               [2, 1, 1, 1, 1]>,
   1103   //
   1104   // VST1x2u
   1105   InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1106                                InstrStage<1, [A9_MUX0], 0>,
   1107                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1108                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
   1109                                InstrStage<1, [A9_NPipe], 0>,
   1110                                InstrStage<1, [A9_LSUnit]>],
   1111                               [2, 1, 1, 1, 1, 1]>,
   1112   //
   1113   // VST1x3u
   1114   InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1115                                InstrStage<1, [A9_MUX0], 0>,
   1116                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1117                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
   1118                                InstrStage<2, [A9_NPipe], 0>,
   1119                                InstrStage<2, [A9_LSUnit]>],
   1120                               [2, 1, 1, 1, 1, 1, 2]>,
   1121   //
   1122   // VST1x4u
   1123   InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1124                                InstrStage<1, [A9_MUX0], 0>,
   1125                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1126                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
   1127                                InstrStage<2, [A9_NPipe], 0>,
   1128                                InstrStage<2, [A9_LSUnit]>],
   1129                               [2, 1, 1, 1, 1, 1, 2, 2]>,
   1130   //
   1131   // VST1ln
   1132   InstrItinData<IIC_VST1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1133                                InstrStage<1, [A9_MUX0], 0>,
   1134                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1135                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
   1136                                InstrStage<1, [A9_NPipe], 0>,
   1137                                InstrStage<1, [A9_LSUnit]>],
   1138                               [1, 1, 1]>,
   1139   //
   1140   // VST1lnu
   1141   InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1142                                InstrStage<1, [A9_MUX0], 0>,
   1143                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1144                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
   1145                                InstrStage<1, [A9_NPipe], 0>,
   1146                                InstrStage<1, [A9_LSUnit]>],
   1147                               [2, 1, 1, 1, 1]>,
   1148   //
   1149   // VST2
   1150   InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1151                                InstrStage<1, [A9_MUX0], 0>,
   1152                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1153                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
   1154                                InstrStage<1, [A9_NPipe], 0>,
   1155                                InstrStage<1, [A9_LSUnit]>],
   1156                               [1, 1, 1, 1]>,
   1157   //
   1158   // VST2x2
   1159   InstrItinData<IIC_VST2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1160                                InstrStage<1, [A9_MUX0], 0>,
   1161                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1162                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
   1163                                InstrStage<3, [A9_NPipe], 0>,
   1164                                InstrStage<3, [A9_LSUnit]>],
   1165                               [1, 1, 1, 1, 2, 2]>,
   1166   //
   1167   // VST2u
   1168   InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1169                                InstrStage<1, [A9_MUX0], 0>,
   1170                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1171                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
   1172                                InstrStage<1, [A9_NPipe], 0>,
   1173                                InstrStage<1, [A9_LSUnit]>],
   1174                               [2, 1, 1, 1, 1, 1]>,
   1175   //
   1176   // VST2x2u
   1177   InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1178                                InstrStage<1, [A9_MUX0], 0>,
   1179                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1180                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
   1181                                InstrStage<3, [A9_NPipe], 0>,
   1182                                InstrStage<3, [A9_LSUnit]>],
   1183                               [2, 1, 1, 1, 1, 1, 2, 2]>,
   1184   //
   1185   // VST2ln
   1186   InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1187                                InstrStage<1, [A9_MUX0], 0>,
   1188                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1189                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
   1190                                InstrStage<1, [A9_NPipe], 0>,
   1191                                InstrStage<1, [A9_LSUnit]>],
   1192                               [1, 1, 1, 1]>,
   1193   //
   1194   // VST2lnu
   1195   InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1196                                InstrStage<1, [A9_MUX0], 0>,
   1197                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1198                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
   1199                                InstrStage<1, [A9_NPipe], 0>,
   1200                                InstrStage<1, [A9_LSUnit]>],
   1201                               [2, 1, 1, 1, 1, 1]>,
   1202   //
   1203   // VST3
   1204   InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1205                                InstrStage<1, [A9_MUX0], 0>,
   1206                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1207                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
   1208                                InstrStage<2, [A9_NPipe], 0>,
   1209                                InstrStage<2, [A9_LSUnit]>],
   1210                               [1, 1, 1, 1, 2]>,
   1211   //
   1212   // VST3u
   1213   InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1214                                InstrStage<1, [A9_MUX0], 0>,
   1215                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1216                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
   1217                                InstrStage<2, [A9_NPipe], 0>,
   1218                                InstrStage<2, [A9_LSUnit]>],
   1219                               [2, 1, 1, 1, 1, 1, 2]>,
   1220   //
   1221   // VST3ln
   1222   InstrItinData<IIC_VST3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1223                                InstrStage<1, [A9_MUX0], 0>,
   1224                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1225                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
   1226                                InstrStage<3, [A9_NPipe], 0>,
   1227                                InstrStage<3, [A9_LSUnit]>],
   1228                               [1, 1, 1, 1, 2]>,
   1229   //
   1230   // VST3lnu
   1231   InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1232                                InstrStage<1, [A9_MUX0], 0>,
   1233                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1234                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
   1235                                InstrStage<3, [A9_NPipe], 0>,
   1236                                InstrStage<3, [A9_LSUnit]>],
   1237                               [2, 1, 1, 1, 1, 1, 2]>,
   1238   //
   1239   // VST4
   1240   InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1241                                InstrStage<1, [A9_MUX0], 0>,
   1242                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1243                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
   1244                                InstrStage<2, [A9_NPipe], 0>,
   1245                                InstrStage<2, [A9_LSUnit]>],
   1246                               [1, 1, 1, 1, 2, 2]>,
   1247   //
   1248   // VST4u
   1249   InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1250                                InstrStage<1, [A9_MUX0], 0>,
   1251                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1252                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
   1253                                InstrStage<2, [A9_NPipe], 0>,
   1254                                InstrStage<2, [A9_LSUnit]>],
   1255                               [2, 1, 1, 1, 1, 1, 2, 2]>,
   1256   //
   1257   // VST4ln
   1258   InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1259                                InstrStage<1, [A9_MUX0], 0>,
   1260                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1261                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
   1262                                InstrStage<2, [A9_NPipe], 0>,
   1263                                InstrStage<2, [A9_LSUnit]>],
   1264                               [1, 1, 1, 1, 2, 2]>,
   1265   //
   1266   // VST4lnu
   1267   InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1268                                InstrStage<1, [A9_MUX0], 0>,
   1269                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1270                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
   1271                                InstrStage<2, [A9_NPipe], 0>,
   1272                                InstrStage<2, [A9_LSUnit]>],
   1273                               [2, 1, 1, 1, 1, 1, 2, 2]>,
   1274 
   1275   //
   1276   // Double-register Integer Unary
   1277   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1278                                InstrStage<1, [A9_MUX0], 0>,
   1279                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1280                                // Extra latency cycles since wbck is 6 cycles
   1281                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1282                                InstrStage<1, [A9_NPipe]>],
   1283                               [4, 2]>,
   1284   //
   1285   // Quad-register Integer Unary
   1286   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1287                                InstrStage<1, [A9_MUX0], 0>,
   1288                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1289                                // Extra latency cycles since wbck is 6 cycles
   1290                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1291                                InstrStage<1, [A9_NPipe]>],
   1292                               [4, 2]>,
   1293   //
   1294   // Double-register Integer Q-Unary
   1295   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1296                                InstrStage<1, [A9_MUX0], 0>,
   1297                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1298                                // Extra latency cycles since wbck is 6 cycles
   1299                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1300                                InstrStage<1, [A9_NPipe]>],
   1301                               [4, 1]>,
   1302   //
   1303   // Quad-register Integer CountQ-Unary
   1304   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1305                                InstrStage<1, [A9_MUX0], 0>,
   1306                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1307                                // Extra latency cycles since wbck is 6 cycles
   1308                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1309                                InstrStage<1, [A9_NPipe]>],
   1310                               [4, 1]>,
   1311   //
   1312   // Double-register Integer Binary
   1313   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1314                                InstrStage<1, [A9_MUX0], 0>,
   1315                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1316                                // Extra latency cycles since wbck is 6 cycles
   1317                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1318                                InstrStage<1, [A9_NPipe]>],
   1319                               [3, 2, 2]>,
   1320   //
   1321   // Quad-register Integer Binary
   1322   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1323                                InstrStage<1, [A9_MUX0], 0>,
   1324                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1325                                // Extra latency cycles since wbck is 6 cycles
   1326                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1327                                InstrStage<1, [A9_NPipe]>],
   1328                               [3, 2, 2]>,
   1329   //
   1330   // Double-register Integer Subtract
   1331   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1332                                InstrStage<1, [A9_MUX0], 0>,
   1333                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1334                                // Extra latency cycles since wbck is 6 cycles
   1335                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1336                                InstrStage<1, [A9_NPipe]>],
   1337                               [3, 2, 1]>,
   1338   //
   1339   // Quad-register Integer Subtract
   1340   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1341                                InstrStage<1, [A9_MUX0], 0>,
   1342                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1343                                // Extra latency cycles since wbck is 6 cycles
   1344                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1345                                InstrStage<1, [A9_NPipe]>],
   1346                               [3, 2, 1]>,
   1347   //
   1348   // Double-register Integer Shift
   1349   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1350                                InstrStage<1, [A9_MUX0], 0>,
   1351                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1352                                // Extra latency cycles since wbck is 6 cycles
   1353                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1354                                InstrStage<1, [A9_NPipe]>],
   1355                               [3, 1, 1]>,
   1356   //
   1357   // Quad-register Integer Shift
   1358   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1359                                InstrStage<1, [A9_MUX0], 0>,
   1360                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1361                                // Extra latency cycles since wbck is 6 cycles
   1362                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1363                                InstrStage<1, [A9_NPipe]>],
   1364                               [3, 1, 1]>,
   1365   //
   1366   // Double-register Integer Shift (4 cycle)
   1367   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1368                                InstrStage<1, [A9_MUX0], 0>,
   1369                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1370                                // Extra latency cycles since wbck is 6 cycles
   1371                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1372                                InstrStage<1, [A9_NPipe]>],
   1373                               [4, 1, 1]>,
   1374   //
   1375   // Quad-register Integer Shift (4 cycle)
   1376   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1377                                InstrStage<1, [A9_MUX0], 0>,
   1378                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1379                                // Extra latency cycles since wbck is 6 cycles
   1380                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1381                                InstrStage<1, [A9_NPipe]>],
   1382                               [4, 1, 1]>,
   1383   //
   1384   // Double-register Integer Binary (4 cycle)
   1385   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1386                                InstrStage<1, [A9_MUX0], 0>,
   1387                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1388                                // Extra latency cycles since wbck is 6 cycles
   1389                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1390                                InstrStage<1, [A9_NPipe]>],
   1391                               [4, 2, 2]>,
   1392   //
   1393   // Quad-register Integer Binary (4 cycle)
   1394   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1395                                InstrStage<1, [A9_MUX0], 0>,
   1396                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1397                                // Extra latency cycles since wbck is 6 cycles
   1398                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1399                                InstrStage<1, [A9_NPipe]>],
   1400                               [4, 2, 2]>,
   1401   //
   1402   // Double-register Integer Subtract (4 cycle)
   1403   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1404                                InstrStage<1, [A9_MUX0], 0>,
   1405                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1406                                // Extra latency cycles since wbck is 6 cycles
   1407                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1408                                InstrStage<1, [A9_NPipe]>],
   1409                               [4, 2, 1]>,
   1410   //
   1411   // Quad-register Integer Subtract (4 cycle)
   1412   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1413                                InstrStage<1, [A9_MUX0], 0>,
   1414                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1415                                // Extra latency cycles since wbck is 6 cycles
   1416                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1417                                InstrStage<1, [A9_NPipe]>],
   1418                               [4, 2, 1]>,
   1419 
   1420   //
   1421   // Double-register Integer Count
   1422   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1423                                InstrStage<1, [A9_MUX0], 0>,
   1424                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1425                                // Extra latency cycles since wbck is 6 cycles
   1426                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1427                                InstrStage<1, [A9_NPipe]>],
   1428                               [3, 2, 2]>,
   1429   //
   1430   // Quad-register Integer Count
   1431   // Result written in N3, but that is relative to the last cycle of multicycle,
   1432   // so we use 4 for those cases
   1433   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1434                                InstrStage<1, [A9_MUX0], 0>,
   1435                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1436                                // Extra latency cycles since wbck is 7 cycles
   1437                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1438                                InstrStage<2, [A9_NPipe]>],
   1439                               [4, 2, 2]>,
   1440   //
   1441   // Double-register Absolute Difference and Accumulate
   1442   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1443                                InstrStage<1, [A9_MUX0], 0>,
   1444                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1445                                // Extra latency cycles since wbck is 6 cycles
   1446                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1447                                InstrStage<1, [A9_NPipe]>],
   1448                               [6, 3, 2, 1]>,
   1449   //
   1450   // Quad-register Absolute Difference and Accumulate
   1451   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1452                                InstrStage<1, [A9_MUX0], 0>,
   1453                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1454                                // Extra latency cycles since wbck is 6 cycles
   1455                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1456                                InstrStage<2, [A9_NPipe]>],
   1457                               [6, 3, 2, 1]>,
   1458   //
   1459   // Double-register Integer Pair Add Long
   1460   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1461                                InstrStage<1, [A9_MUX0], 0>,
   1462                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1463                                // Extra latency cycles since wbck is 6 cycles
   1464                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1465                                InstrStage<1, [A9_NPipe]>],
   1466                               [6, 3, 1]>,
   1467   //
   1468   // Quad-register Integer Pair Add Long
   1469   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1470                                InstrStage<1, [A9_MUX0], 0>,
   1471                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1472                                // Extra latency cycles since wbck is 6 cycles
   1473                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1474                                InstrStage<2, [A9_NPipe]>],
   1475                               [6, 3, 1]>,
   1476 
   1477   //
   1478   // Double-register Integer Multiply (.8, .16)
   1479   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1480                                InstrStage<1, [A9_MUX0], 0>,
   1481                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1482                                // Extra latency cycles since wbck is 6 cycles
   1483                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1484                                InstrStage<1, [A9_NPipe]>],
   1485                               [6, 2, 2]>,
   1486   //
   1487   // Quad-register Integer Multiply (.8, .16)
   1488   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1489                                InstrStage<1, [A9_MUX0], 0>,
   1490                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1491                                // Extra latency cycles since wbck is 7 cycles
   1492                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1493                                InstrStage<2, [A9_NPipe]>],
   1494                               [7, 2, 2]>,
   1495 
   1496   //
   1497   // Double-register Integer Multiply (.32)
   1498   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1499                                InstrStage<1, [A9_MUX0], 0>,
   1500                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1501                                // Extra latency cycles since wbck is 7 cycles
   1502                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1503                                InstrStage<2, [A9_NPipe]>],
   1504                               [7, 2, 1]>,
   1505   //
   1506   // Quad-register Integer Multiply (.32)
   1507   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1508                                InstrStage<1, [A9_MUX0], 0>,
   1509                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1510                                // Extra latency cycles since wbck is 9 cycles
   1511                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
   1512                                InstrStage<4, [A9_NPipe]>],
   1513                               [9, 2, 1]>,
   1514   //
   1515   // Double-register Integer Multiply-Accumulate (.8, .16)
   1516   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1517                                InstrStage<1, [A9_MUX0], 0>,
   1518                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1519                                // Extra latency cycles since wbck is 6 cycles
   1520                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1521                                InstrStage<1, [A9_NPipe]>],
   1522                               [6, 3, 2, 2]>,
   1523   //
   1524   // Double-register Integer Multiply-Accumulate (.32)
   1525   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1526                                InstrStage<1, [A9_MUX0], 0>,
   1527                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1528                                // Extra latency cycles since wbck is 7 cycles
   1529                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1530                                InstrStage<2, [A9_NPipe]>],
   1531                               [7, 3, 2, 1]>,
   1532   //
   1533   // Quad-register Integer Multiply-Accumulate (.8, .16)
   1534   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1535                                InstrStage<1, [A9_MUX0], 0>,
   1536                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1537                                // Extra latency cycles since wbck is 7 cycles
   1538                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1539                                InstrStage<2, [A9_NPipe]>],
   1540                               [7, 3, 2, 2]>,
   1541   //
   1542   // Quad-register Integer Multiply-Accumulate (.32)
   1543   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1544                                InstrStage<1, [A9_MUX0], 0>,
   1545                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1546                                // Extra latency cycles since wbck is 9 cycles
   1547                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
   1548                                InstrStage<4, [A9_NPipe]>],
   1549                               [9, 3, 2, 1]>,
   1550 
   1551   //
   1552   // Move
   1553   InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1554                                InstrStage<1, [A9_MUX0], 0>,
   1555                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1556                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
   1557                                InstrStage<1, [A9_NPipe]>],
   1558                               [1,1]>,
   1559   //
   1560   // Move Immediate
   1561   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1562                                InstrStage<1, [A9_MUX0], 0>,
   1563                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1564                                // Extra latency cycles since wbck is 6 cycles
   1565                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1566                                InstrStage<1, [A9_NPipe]>],
   1567                               [3]>,
   1568   //
   1569   // Double-register Permute Move
   1570   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1571                                InstrStage<1, [A9_MUX0], 0>,
   1572                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1573                                // Extra latency cycles since wbck is 6 cycles
   1574                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1575                                InstrStage<1, [A9_NPipe]>],
   1576                               [2, 1]>,
   1577   //
   1578   // Quad-register Permute Move
   1579   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1580                                InstrStage<1, [A9_MUX0], 0>,
   1581                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1582                                // Extra latency cycles since wbck is 6 cycles
   1583                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1584                                InstrStage<1, [A9_NPipe]>],
   1585                               [2, 1]>,
   1586   //
   1587   // Integer to Single-precision Move
   1588   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1589                                InstrStage<1, [A9_MUX0], 0>,
   1590                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1591                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
   1592                                InstrStage<1, [A9_NPipe]>],
   1593                               [1, 1]>,
   1594   //
   1595   // Integer to Double-precision Move
   1596   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1597                                InstrStage<1, [A9_MUX0], 0>,
   1598                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1599                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
   1600                                InstrStage<1, [A9_NPipe]>],
   1601                               [1, 1, 1]>,
   1602   //
   1603   // Single-precision to Integer Move
   1604   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1605                                InstrStage<1, [A9_MUX0], 0>,
   1606                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1607                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
   1608                                InstrStage<1, [A9_NPipe]>],
   1609                               [2, 1]>,
   1610   //
   1611   // Double-precision to Integer Move
   1612   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1613                                InstrStage<1, [A9_MUX0], 0>,
   1614                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1615                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
   1616                                InstrStage<1, [A9_NPipe]>],
   1617                               [2, 2, 1]>,
   1618   //
   1619   // Integer to Lane Move
   1620   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1621                                InstrStage<1, [A9_MUX0], 0>,
   1622                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1623                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
   1624                                InstrStage<2, [A9_NPipe]>],
   1625                               [3, 1, 1]>,
   1626 
   1627   //
   1628   // Vector narrow move
   1629   InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1630                                InstrStage<1, [A9_MUX0], 0>,
   1631                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1632                                // Extra latency cycles since wbck is 6 cycles
   1633                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1634                                InstrStage<1, [A9_NPipe]>],
   1635                               [3, 1]>,
   1636   //
   1637   // Double-register FP Unary
   1638   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1639                                InstrStage<1, [A9_MUX0], 0>,
   1640                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1641                                // Extra latency cycles since wbck is 6 cycles
   1642                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1643                                InstrStage<1, [A9_NPipe]>],
   1644                               [5, 2]>,
   1645   //
   1646   // Quad-register FP Unary
   1647   // Result written in N5, but that is relative to the last cycle of multicycle,
   1648   // so we use 6 for those cases
   1649   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1650                                InstrStage<1, [A9_MUX0], 0>,
   1651                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1652                                // Extra latency cycles since wbck is 7 cycles
   1653                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1654                                InstrStage<2, [A9_NPipe]>],
   1655                               [6, 2]>,
   1656   //
   1657   // Double-register FP Binary
   1658   // FIXME: We're using this itin for many instructions and [2, 2] here is too
   1659   // optimistic.
   1660   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1661                                InstrStage<1, [A9_MUX0], 0>,
   1662                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1663                                // Extra latency cycles since wbck is 6 cycles
   1664                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1665                                InstrStage<1, [A9_NPipe]>],
   1666                               [5, 2, 2]>,
   1667 
   1668   //
   1669   // VPADD, etc.
   1670   InstrItinData<IIC_VPBIND,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1671                                InstrStage<1, [A9_MUX0], 0>,
   1672                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1673                                // Extra latency cycles since wbck is 6 cycles
   1674                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1675                                InstrStage<1, [A9_NPipe]>],
   1676                               [5, 1, 1]>,
   1677   //
   1678   // Double-register FP VMUL
   1679   InstrItinData<IIC_VFMULD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1680                                InstrStage<1, [A9_MUX0], 0>,
   1681                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1682                                // Extra latency cycles since wbck is 6 cycles
   1683                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1684                                InstrStage<1, [A9_NPipe]>],
   1685                               [5, 2, 1]>,
   1686   //
   1687   // Quad-register FP Binary
   1688   // Result written in N5, but that is relative to the last cycle of multicycle,
   1689   // so we use 6 for those cases
   1690   // FIXME: We're using this itin for many instructions and [2, 2] here is too
   1691   // optimistic.
   1692   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1693                                InstrStage<1, [A9_MUX0], 0>,
   1694                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1695                                // Extra latency cycles since wbck is 7 cycles
   1696                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1697                                InstrStage<2, [A9_NPipe]>],
   1698                               [6, 2, 2]>,
   1699   //
   1700   // Quad-register FP VMUL
   1701   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1702                                InstrStage<1, [A9_MUX0], 0>,
   1703                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1704                                // Extra latency cycles since wbck is 7 cycles
   1705                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1706                                InstrStage<1, [A9_NPipe]>],
   1707                               [6, 2, 1]>,
   1708   //
   1709   // Double-register FP Multiple-Accumulate
   1710   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1711                                InstrStage<1, [A9_MUX0], 0>,
   1712                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1713                                // Extra latency cycles since wbck is 7 cycles
   1714                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1715                                InstrStage<2, [A9_NPipe]>],
   1716                               [6, 3, 2, 1]>,
   1717   //
   1718   // Quad-register FP Multiple-Accumulate
   1719   // Result written in N9, but that is relative to the last cycle of multicycle,
   1720   // so we use 10 for those cases
   1721   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1722                                InstrStage<1, [A9_MUX0], 0>,
   1723                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1724                                // Extra latency cycles since wbck is 9 cycles
   1725                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
   1726                                InstrStage<4, [A9_NPipe]>],
   1727                               [8, 4, 2, 1]>,
   1728   //
   1729   // Double-register Fused FP Multiple-Accumulate
   1730   InstrItinData<IIC_VFMACD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1731                                InstrStage<1, [A9_MUX0], 0>,
   1732                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1733                                // Extra latency cycles since wbck is 7 cycles
   1734                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1735                                InstrStage<2, [A9_NPipe]>],
   1736                               [6, 3, 2, 1]>,
   1737   //
   1738   // Quad-register Fused FP Multiple-Accumulate
   1739   // Result written in N9, but that is relative to the last cycle of multicycle,
   1740   // so we use 10 for those cases
   1741   InstrItinData<IIC_VFMACQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1742                                InstrStage<1, [A9_MUX0], 0>,
   1743                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1744                                // Extra latency cycles since wbck is 9 cycles
   1745                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
   1746                                InstrStage<4, [A9_NPipe]>],
   1747                               [8, 4, 2, 1]>,
   1748   //
   1749   // Double-register Reciprical Step
   1750   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1751                                InstrStage<1, [A9_MUX0], 0>,
   1752                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1753                                // Extra latency cycles since wbck is 10 cycles
   1754                                InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
   1755                                InstrStage<1, [A9_NPipe]>],
   1756                               [9, 2, 2]>,
   1757   //
   1758   // Quad-register Reciprical Step
   1759   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1760                                InstrStage<1, [A9_MUX0], 0>,
   1761                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1762                                // Extra latency cycles since wbck is 11 cycles
   1763                                InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
   1764                                InstrStage<2, [A9_NPipe]>],
   1765                               [10, 2, 2]>,
   1766   //
   1767   // Double-register Permute
   1768   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1769                                InstrStage<1, [A9_MUX0], 0>,
   1770                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1771                                // Extra latency cycles since wbck is 6 cycles
   1772                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1773                                InstrStage<1, [A9_NPipe]>],
   1774                               [2, 2, 1, 1]>,
   1775   //
   1776   // Quad-register Permute
   1777   // Result written in N2, but that is relative to the last cycle of multicycle,
   1778   // so we use 3 for those cases
   1779   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1780                                InstrStage<1, [A9_MUX0], 0>,
   1781                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1782                                // Extra latency cycles since wbck is 7 cycles
   1783                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1784                                InstrStage<2, [A9_NPipe]>],
   1785                               [3, 3, 1, 1]>,
   1786   //
   1787   // Quad-register Permute (3 cycle issue)
   1788   // Result written in N2, but that is relative to the last cycle of multicycle,
   1789   // so we use 4 for those cases
   1790   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1791                                InstrStage<1, [A9_MUX0], 0>,
   1792                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1793                                // Extra latency cycles since wbck is 8 cycles
   1794                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
   1795                                InstrStage<3, [A9_NPipe]>],
   1796                               [4, 4, 1, 1]>,
   1797 
   1798   //
   1799   // Double-register VEXT
   1800   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1801                                InstrStage<1, [A9_MUX0], 0>,
   1802                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1803                                // Extra latency cycles since wbck is 6 cycles
   1804                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
   1805                                InstrStage<1, [A9_NPipe]>],
   1806                               [2, 1, 1]>,
   1807   //
   1808   // Quad-register VEXT
   1809   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1810                                InstrStage<1, [A9_MUX0], 0>,
   1811                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1812                                // Extra latency cycles since wbck is 7 cycles
   1813                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1814                                InstrStage<2, [A9_NPipe]>],
   1815                               [3, 1, 2]>,
   1816   //
   1817   // VTB
   1818   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1819                                InstrStage<1, [A9_MUX0], 0>,
   1820                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1821                                // Extra latency cycles since wbck is 7 cycles
   1822                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1823                                InstrStage<2, [A9_NPipe]>],
   1824                               [3, 2, 1]>,
   1825   InstrItinData<IIC_VTB2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1826                                InstrStage<1, [A9_MUX0], 0>,
   1827                                InstrStage<2, [A9_DRegsN],   0, Required>,
   1828                                // Extra latency cycles since wbck is 7 cycles
   1829                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1830                                InstrStage<2, [A9_NPipe]>],
   1831                               [3, 2, 2, 1]>,
   1832   InstrItinData<IIC_VTB3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1833                                InstrStage<1, [A9_MUX0], 0>,
   1834                                InstrStage<2, [A9_DRegsN],   0, Required>,
   1835                                // Extra latency cycles since wbck is 8 cycles
   1836                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
   1837                                InstrStage<3, [A9_NPipe]>],
   1838                               [4, 2, 2, 3, 1]>,
   1839   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1840                                InstrStage<1, [A9_MUX0], 0>,
   1841                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1842                                // Extra latency cycles since wbck is 8 cycles
   1843                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
   1844                                InstrStage<3, [A9_NPipe]>],
   1845                               [4, 2, 2, 3, 3, 1]>,
   1846   //
   1847   // VTBX
   1848   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1849                                InstrStage<1, [A9_MUX0], 0>,
   1850                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1851                                // Extra latency cycles since wbck is 7 cycles
   1852                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1853                                InstrStage<2, [A9_NPipe]>],
   1854                               [3, 1, 2, 1]>,
   1855   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1856                                InstrStage<1, [A9_MUX0], 0>,
   1857                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1858                                // Extra latency cycles since wbck is 7 cycles
   1859                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
   1860                                InstrStage<2, [A9_NPipe]>],
   1861                               [3, 1, 2, 2, 1]>,
   1862   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1863                                InstrStage<1, [A9_MUX0], 0>,
   1864                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1865                                // Extra latency cycles since wbck is 8 cycles
   1866                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
   1867                                InstrStage<3, [A9_NPipe]>],
   1868                               [4, 1, 2, 2, 3, 1]>,
   1869   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
   1870                                InstrStage<1, [A9_MUX0], 0>,
   1871                                InstrStage<1, [A9_DRegsN],   0, Required>,
   1872                                // Extra latency cycles since wbck is 8 cycles
   1873                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
   1874                                InstrStage<2, [A9_NPipe]>],
   1875                               [4, 1, 2, 2, 3, 3, 1]>
   1876 ]>;
   1877 
   1878 // ===---------------------------------------------------------------------===//
   1879 // The following definitions describe the simpler per-operand machine model.
   1880 // This works with MachineScheduler and will eventually replace itineraries.
   1881 
   1882 class A9WriteLMOpsListType<list<WriteSequence> writes> {
   1883   list <WriteSequence> Writes = writes;
   1884   SchedMachineModel SchedModel = ?;
   1885 }
   1886 
   1887 // Cortex-A9 machine model for scheduling and other instruction cost heuristics.
   1888 def CortexA9Model : SchedMachineModel {
   1889   let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
   1890   let MicroOpBufferSize = 56; // Based on available renamed registers.
   1891   let LoadLatency = 2; // Optimistic load latency assuming bypass.
   1892                        // This is overriden by OperandCycles if the
   1893                        // Itineraries are queried instead.
   1894   let MispredictPenalty = 8; // Based on estimate of pipeline depth.
   1895 
   1896   let Itineraries = CortexA9Itineraries;
   1897 
   1898   // FIXME: Many vector operations were never given an itinerary. We
   1899   // haven't mapped these to the new model either.
   1900   let CompleteModel = 0;
   1901 }
   1902 
   1903 //===----------------------------------------------------------------------===//
   1904 // Define each kind of processor resource and number available.
   1905 //
   1906 // The AGU unit has BufferSize=1 so that the latency between operations
   1907 // that use it are considered to stall other operations.
   1908 //
   1909 // The FP unit has BufferSize=0 so that it is a hard dispatch
   1910 // hazard. No instruction may be dispatched while the unit is reserved.
   1911 
   1912 let SchedModel = CortexA9Model in {
   1913 
   1914 def A9UnitALU : ProcResource<2>;
   1915 def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
   1916 def A9UnitAGU : ProcResource<1> { let BufferSize = 1; }
   1917 def A9UnitLS  : ProcResource<1>;
   1918 def A9UnitFP  : ProcResource<1> { let BufferSize = 0; }
   1919 def A9UnitB   : ProcResource<1>;
   1920 
   1921 //===----------------------------------------------------------------------===//
   1922 // Define scheduler read/write types with their resources and latency on A9.
   1923 
   1924 // Consume an issue slot, but no processor resources. This is useful when all
   1925 // other writes associated with the operand have NumMicroOps = 0.
   1926 def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; }
   1927 
   1928 // Write an integer register.
   1929 def A9WriteI : SchedWriteRes<[A9UnitALU]>;
   1930 // Write an integer shifted-by register
   1931 def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
   1932 
   1933 // Basic ALU.
   1934 def A9WriteALU : SchedWriteRes<[A9UnitALU]>;
   1935 // ALU with operand shifted by immediate.
   1936 def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; }
   1937 // ALU with operand shifted by register.
   1938 def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
   1939 
   1940 // Multiplication
   1941 def A9WriteM   : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; }
   1942 def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5;
   1943                                               let NumMicroOps = 0; }
   1944 def A9WriteM16   : SchedWriteRes<[A9UnitMul]> { let Latency = 3; }
   1945 def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4;
   1946                                                 let NumMicroOps = 0; }
   1947 
   1948 // Floating-point
   1949 // Only one FP or AGU instruction may issue per cycle. We model this
   1950 // by having FP instructions consume the AGU resource.
   1951 def A9WriteF      : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
   1952 def A9WriteFMov   : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
   1953 def A9WriteFMulS  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
   1954 def A9WriteFMulD  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
   1955 def A9WriteFMAS   : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; }
   1956 def A9WriteFMAD   : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
   1957 def A9WriteFDivS  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; }
   1958 def A9WriteFDivD  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; }
   1959 def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; }
   1960 def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; }
   1961 
   1962 // NEON has an odd mix of latencies. Simply name the write types by latency.
   1963 def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
   1964 def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; }
   1965 def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; }
   1966 def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
   1967 def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
   1968 def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
   1969 def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; }
   1970 def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
   1971 def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; }
   1972 
   1973 // Reserve A9UnitFP for 2 consecutive cycles.
   1974 def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
   1975   let Latency = 4;
   1976   let ResourceCycles = [2];
   1977 }
   1978 def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
   1979   let Latency = 7;
   1980   let ResourceCycles = [2];
   1981 }
   1982 def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
   1983   let Latency = 9;
   1984   let ResourceCycles = [2];
   1985 }
   1986 
   1987 // Branches don't have a def operand but still consume resources.
   1988 def A9WriteB : SchedWriteRes<[A9UnitB]>;
   1989 
   1990 // Address generation.
   1991 def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; }
   1992 
   1993 // Load Integer.
   1994 def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; }
   1995 // Load the upper 32-bits using the same micro-op.
   1996 def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3;
   1997                                      let NumMicroOps = 0; }
   1998 // Offset shifted by register.
   1999 def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
   2000 // Load (and zero extend) a byte.
   2001 def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
   2002 def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; }
   2003 
   2004 // Load or Store Float, aligned.
   2005 def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; }
   2006 
   2007 // Store Integer.
   2008 def A9WriteS : SchedWriteRes<[A9UnitLS]>;
   2009 
   2010 //===----------------------------------------------------------------------===//
   2011 // Define resources dynamically for load multiple variants.
   2012 
   2013 // Define helpers for extra latency without consuming resources.
   2014 def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; }
   2015 foreach NumCycles = 2-8 in {
   2016 def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>;
   2017 } // foreach NumCycles
   2018 
   2019 // Define address generation sequences and predicates for 8 flavors of LDMs.
   2020 foreach NumAddr = 1-8 in {
   2021 
   2022 // Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive
   2023 // latency for instructions that generate multiple loads or stores.
   2024 def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>;
   2025 
   2026 // Define a predicate to select the LDM based on number of memory addresses.
   2027 def A9LMAdr#NumAddr#Pred :
   2028   SchedPredicate<"(TII->getNumLDMAddresses(*MI)+1)/2 == "#NumAddr>;
   2029 
   2030 } // foreach NumAddr
   2031 
   2032 // Fall-back for unknown LDMs.
   2033 def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == 0">;
   2034 
   2035 // LDM/VLDM/VLDn address generation latency & resources.
   2036 // Dynamically select the A9WriteAdrN sequence using a predicate.
   2037 def A9WriteLMAdr : SchedWriteVariant<[
   2038   SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>,
   2039   SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>,
   2040   SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>,
   2041   SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>,
   2042   SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>,
   2043   SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>,
   2044   SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>,
   2045   SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>,
   2046   // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers.
   2047   SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>;
   2048 
   2049 // Define LDM Resources.
   2050 // These take no issue resource, so they can be combined with other
   2051 // writes like WriteB.
   2052 // A9WriteLMLo takes a single LS resource and 2 cycles.
   2053 def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2;
   2054                                               let NumMicroOps = 0; }
   2055 // Assuming aligned access, the upper half of each pair is free with
   2056 // the same latency.
   2057 def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2;
   2058                                       let NumMicroOps = 0; }
   2059 // Each A9WriteL#N variant adds N cycles of latency without consuming
   2060 // additional resources.
   2061 foreach NumAddr = 1-8 in {
   2062 def A9WriteL#NumAddr : WriteSequence<
   2063   [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
   2064 def A9WriteL#NumAddr#Hi : WriteSequence<
   2065   [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
   2066 }
   2067 
   2068 //===----------------------------------------------------------------------===//
   2069 // LDM: Load multiple into 32-bit integer registers.
   2070 
   2071 def A9WriteLMOpsList : A9WriteLMOpsListType<
   2072                  [A9WriteL1, A9WriteL1Hi,
   2073                   A9WriteL2, A9WriteL2Hi,
   2074                   A9WriteL3, A9WriteL3Hi,
   2075                   A9WriteL4, A9WriteL4Hi,
   2076                   A9WriteL5, A9WriteL5Hi,
   2077                   A9WriteL6, A9WriteL6Hi,
   2078                   A9WriteL7, A9WriteL7Hi,
   2079                   A9WriteL8, A9WriteL8Hi]>;
   2080 
   2081 // A9WriteLM variants expand into a pair of writes for each 64-bit
   2082 // value loaded. When the number of registers is odd, the last
   2083 // A9WriteLnHi is naturally ignored because the instruction has no
   2084 // following def operands.  These variants take no issue resource, so
   2085 // they may need to be part of a WriteSequence that includes A9WriteIssue.
   2086 def A9WriteLM : SchedWriteVariant<[
   2087   SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>,
   2088   SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>,
   2089   SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>,
   2090   SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>,
   2091   SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>,
   2092   SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>,
   2093   SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>,
   2094   SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>,
   2095   // For unknown LDMs, define the maximum number of writes, but only
   2096   // make the first two consume resources.
   2097   SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi,
   2098                              A9WriteL2, A9WriteL2Hi,
   2099                              A9WriteL3Hi, A9WriteL3Hi,
   2100                              A9WriteL4Hi, A9WriteL4Hi,
   2101                              A9WriteL5Hi, A9WriteL5Hi,
   2102                              A9WriteL6Hi, A9WriteL6Hi,
   2103                              A9WriteL7Hi, A9WriteL7Hi,
   2104                              A9WriteL8Hi, A9WriteL8Hi]>]> {
   2105   let Variadic = 1;
   2106 }
   2107 
   2108 //===----------------------------------------------------------------------===//
   2109 // VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support.
   2110 
   2111 // A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources
   2112 // so can be used in WriteSequences for in single-issue instructions that
   2113 // encapsulate multiple loads.
   2114 def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> {
   2115   let Latency = 1;
   2116   let NumMicroOps = 0;
   2117 }
   2118 
   2119 foreach NumAddr = 1-8 in {
   2120 
   2121 // Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops.
   2122 def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>;
   2123 
   2124 // A9WriteLfp1-8 definitions are statically expanded into a sequence of
   2125 // A9WriteLfpOps with additive latency that takes a single issue slot.
   2126 // Used directly to describe NEON VLDn.
   2127 def A9WriteLfp#NumAddr : WriteSequence<
   2128   [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
   2129 
   2130 // A9WriteLfp1-8Mov adds a cycle of latency and FP resource for
   2131 // permuting loaded values.
   2132 def A9WriteLfp#NumAddr#Mov : WriteSequence<
   2133   [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
   2134 
   2135 } // foreach NumAddr
   2136 
   2137 // Define VLDM/VSTM PreRA resources.
   2138 // A9WriteLMfpPreRA are dynamically expanded into the correct
   2139 // A9WriteLfp1-8 sequence based on a predicate. This supports the
   2140 // preRA VLDM variants in which all 64-bit loads are written to the
   2141 // same tuple of either single or double precision registers.
   2142 def A9WriteLMfpPreRA : SchedWriteVariant<[
   2143   SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>,
   2144   SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>,
   2145   SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>,
   2146   SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>,
   2147   SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>,
   2148   SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>,
   2149   SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>,
   2150   SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>,
   2151   // For unknown VLDM/VSTM PreRA, assume 2xS registers.
   2152   SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>;
   2153 
   2154 // Define VLDM/VSTM PostRA Resources.
   2155 // A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency.
   2156 def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; }
   2157 
   2158 foreach NumAddr = 1-8 in {
   2159 
   2160 // Each A9WriteL#N variant adds N cycles of latency without consuming
   2161 // additional resources.
   2162 def A9WriteLMfp#NumAddr : WriteSequence<
   2163   [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
   2164 
   2165 // Assuming aligned access, the upper half of each pair is free with
   2166 // the same latency.
   2167 def A9WriteLMfp#NumAddr#Hi : WriteSequence<
   2168   [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
   2169 
   2170 } // foreach NumAddr
   2171 
   2172 // VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a
   2173 // pair of writes for each 64-bit data loaded. When the number of
   2174 // registers is odd, the last WriteLMfpnHi is naturally ignored because
   2175 // the instruction has no following def operands.
   2176 
   2177 def A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType<
   2178                  [A9WriteLMfp1, A9WriteLMfp2,       // 0-1
   2179                   A9WriteLMfp3, A9WriteLMfp4,       // 2-3
   2180                   A9WriteLMfp5, A9WriteLMfp6,       // 4-5
   2181                   A9WriteLMfp7, A9WriteLMfp8,       // 6-7
   2182                   A9WriteLMfp1Hi,                   // 8-8
   2183                   A9WriteLMfp2Hi, A9WriteLMfp2Hi,   // 9-10
   2184                   A9WriteLMfp3Hi, A9WriteLMfp3Hi,   // 11-12
   2185                   A9WriteLMfp4Hi, A9WriteLMfp4Hi,   // 13-14
   2186                   A9WriteLMfp5Hi, A9WriteLMfp5Hi,   // 15-16
   2187                   A9WriteLMfp6Hi, A9WriteLMfp6Hi,   // 17-18
   2188                   A9WriteLMfp7Hi, A9WriteLMfp7Hi,   // 19-20
   2189                   A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22
   2190 
   2191 def A9WriteLMfpPostRA : SchedWriteVariant<[
   2192   SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>,
   2193   SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>,
   2194   SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>,
   2195   SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>,
   2196   SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>,
   2197   SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>,
   2198   SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>,
   2199   SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>,
   2200   // For unknown LDMs, define the maximum number of writes, but only
   2201   // make the first two consume resources. We are optimizing for the case
   2202   // where the operands are DPRs, and this determines the first eight
   2203   // types. The remaining eight types are filled to cover the case
   2204   // where the operands are SPRs.
   2205   SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2,
   2206                              A9WriteLMfp3Hi, A9WriteLMfp4Hi,
   2207                              A9WriteLMfp5Hi, A9WriteLMfp6Hi,
   2208                              A9WriteLMfp7Hi, A9WriteLMfp8Hi,
   2209                              A9WriteLMfp5Hi, A9WriteLMfp5Hi,
   2210                              A9WriteLMfp6Hi, A9WriteLMfp6Hi,
   2211                              A9WriteLMfp7Hi, A9WriteLMfp7Hi,
   2212                              A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> {
   2213   let Variadic = 1;
   2214 }
   2215 
   2216 // Distinguish between our multiple MI-level forms of the same
   2217 // VLDM/VSTM instructions.
   2218 def A9PreRA : SchedPredicate<
   2219   "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">;
   2220 def A9PostRA : SchedPredicate<
   2221   "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">;
   2222 
   2223 // VLDM represents all destination registers as a single register
   2224 // tuple, unlike LDM. So the number of write operands is not variadic.
   2225 def A9WriteLMfp : SchedWriteVariant<[
   2226   SchedVar<A9PreRA, [A9WriteLMfpPreRA]>,
   2227   SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>;
   2228 
   2229 //===----------------------------------------------------------------------===//
   2230 // Resources for other (non-LDM/VLDM) Variants.
   2231 
   2232 // These mov immediate writers are unconditionally expanded with
   2233 // additive latency.
   2234 def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>;
   2235 def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>;
   2236 def A9WriteI2ld  : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>;
   2237 
   2238 // Some ALU operations can read loaded integer values one cycle early.
   2239 def A9ReadALU : SchedReadAdvance<1,
   2240   [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi,
   2241    A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4,
   2242    A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8,
   2243    A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi,
   2244    A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>;
   2245 
   2246 // Read types for operands that are unconditionally read in cycle N
   2247 // after the instruction issues, decreases producer latency by N-1.
   2248 def A9Read2 : SchedReadAdvance<1>;
   2249 def A9Read3 : SchedReadAdvance<2>;
   2250 def A9Read4 : SchedReadAdvance<3>;
   2251 
   2252 //===----------------------------------------------------------------------===//
   2253 // Map itinerary classes to scheduler read/write resources per operand.
   2254 //
   2255 // For ARM, we piggyback scheduler resources on the Itinerary classes
   2256 // to avoid perturbing the existing instruction definitions.
   2257 
   2258 // This table follows the ARM Cortex-A9 Technical Reference Manuals,
   2259 // mostly in order.
   2260 
   2261 def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
   2262                          IIC_iMVNi,IIC_iMVNsi,
   2263                          IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
   2264 def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>;
   2265 def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
   2266 
   2267 def :ItinRW<[A9WriteI2],   [IIC_iMOVix2,IIC_iCMOVix2]>;
   2268 def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>;
   2269 def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>;
   2270 
   2271 def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
   2272 def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
   2273 def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>;
   2274 def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
   2275 def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>;
   2276 def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB
   2277 def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
   2278 def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>;
   2279 
   2280 // A9WriteHi ignored for MUL32.
   2281 def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32,
   2282                                      IIC_iMUL64,IIC_iMAC64]>;
   2283 // FIXME: SMLALxx needs itin classes
   2284 def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>;
   2285 
   2286 // TODO: For floating-point ops, we model the pipeline forwarding
   2287 // latencies here. WAW latencies are sometimes longer.
   2288 
   2289 def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI,
   2290                             IIC_fpUNA32, IIC_fpUNA64,
   2291                             IIC_fpCMP32, IIC_fpCMP64]>;
   2292 def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>;
   2293 def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS,
   2294                          IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI,
   2295                          IIC_fpALU32, IIC_fpALU64]>;
   2296 def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>;
   2297 def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>;
   2298 def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>;
   2299 def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>;
   2300 def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>;
   2301 def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>;
   2302 def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>;
   2303 def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>;
   2304 
   2305 def :ItinRW<[A9WriteB], [IIC_Br]>;
   2306 
   2307 // A9 PLD is processed in a dedicated unit.
   2308 def :ItinRW<[], [IIC_Preload]>;
   2309 
   2310 // Note: We must assume that loads are aligned, since the machine
   2311 // model cannot know this statically and A9 ignores alignment hints.
   2312 
   2313 // A9WriteAdr consumes AGU regardless address writeback. But it's
   2314 // latency is only relevant for users of an updated address.
   2315 def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r,
   2316                                      IIC_iLoad_iu,IIC_iLoad_ru]>;
   2317 def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>;
   2318 def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r,
   2319                                        IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>;
   2320 def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>;
   2321 def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r,
   2322                                             IIC_iLoad_d_ru]>;
   2323 // Store either has no def operands, or the one def for address writeback.
   2324 def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r,
   2325                                      IIC_iStore_iu, IIC_iStore_ru,
   2326                                      IIC_iStore_d_i, IIC_iStore_d_r,
   2327                                      IIC_iStore_d_ru]>;
   2328 def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu,
   2329                                       IIC_iStore_bh_i, IIC_iStore_bh_r,
   2330                                       IIC_iStore_bh_iu, IIC_iStore_bh_ru]>;
   2331 def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>;
   2332 
   2333 // A9WriteML will be expanded into a separate write for each def
   2334 // operand. Address generation consumes resources, but A9WriteLMAdr
   2335 // is listed after all def operands, so has no effective latency.
   2336 //
   2337 // Note: A9WriteLM expands into an even number of def operands. The
   2338 // actual number of def operands may be less by one.
   2339 def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>;
   2340 
   2341 // Load multiple with address writeback has an extra def operand in
   2342 // front of the loaded registers.
   2343 //
   2344 // Reuse the load-multiple variants for store-multiple because the
   2345 // resources are identical, For stores only the address writeback
   2346 // has a def operand so the WriteL latencies are unused.
   2347 def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu,
   2348                                                       IIC_iStore_m,
   2349                                                       IIC_iStore_mu]>;
   2350 def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>;
   2351 def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>;
   2352 
   2353 def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>;
   2354 
   2355 def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>;
   2356 def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>;
   2357 def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64,
   2358                                         IIC_fpStore_m, IIC_fpStore_mu]>;
   2359 
   2360 // Note: Unlike VLDM, VLD1 expects the writeback operand after the
   2361 // normal writes.
   2362 def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u,
   2363                                          IIC_VLD1x2, IIC_VLD1x2u]>;
   2364 def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u,
   2365                                          IIC_VLD1x4, IIC_VLD1x4u,
   2366                                          IIC_VLD4dup, IIC_VLD4dupu]>;
   2367 def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu,
   2368                                             IIC_VLD2, IIC_VLD2u,
   2369                                             IIC_VLD2dup, IIC_VLD2dupu]>;
   2370 def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu,
   2371                                             IIC_VLD2x2, IIC_VLD2x2u,
   2372                                             IIC_VLD2ln, IIC_VLD2lnu]>;
   2373 def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u,
   2374                                             IIC_VLD3dup, IIC_VLD3dupu]>;
   2375 def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u,
   2376                                             IIC_VLD4ln, IIC_VLD4lnu]>;
   2377 def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>;
   2378 
   2379 // Vector stores use similar resources to vector loads, so use the
   2380 // same write types. The address write must be first for stores with
   2381 // address writeback.
   2382 def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u,
   2383                                          IIC_VST1x2, IIC_VST1x2u,
   2384                                          IIC_VST1ln, IIC_VST1lnu,
   2385                                          IIC_VST2, IIC_VST2u,
   2386                                          IIC_VST2x2, IIC_VST2x2u,
   2387                                          IIC_VST2ln, IIC_VST2lnu]>;
   2388 def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u,
   2389                                          IIC_VST1x4, IIC_VST1x4u,
   2390                                          IIC_VST3, IIC_VST3u,
   2391                                          IIC_VST3ln, IIC_VST3lnu,
   2392                                          IIC_VST4, IIC_VST4u,
   2393                                          IIC_VST4ln, IIC_VST4lnu]>;
   2394 
   2395 // NEON moves.
   2396 def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>;
   2397 def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>;
   2398 def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>;
   2399 
   2400 // NEON integer arithmetic
   2401 //
   2402 // VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL
   2403 def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>;
   2404 // VSUB/VMVN/VCLSD/VCLZD/VCNTD
   2405 def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>;
   2406 // VADDL/VSUBL/VNEG are mapped later under IIC_SHLi.
   2407 // ...
   2408 // VHADD/VRHADD/VQADD/VTST/VADH/VRADH
   2409 def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>;
   2410 
   2411 // VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL
   2412 def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>;
   2413 // VQNEG/VQABS
   2414 def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>;
   2415 // VABS
   2416 def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>;
   2417 // VPADD/VPADDL are mapped later under IIC_SHLi.
   2418 // ...
   2419 // VCLSQ/VCLZQ/VCNTQ, takes two cycles.
   2420 def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>;
   2421 // VMOVimm/VMVNimm/VORRimm/VBICimm
   2422 def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>;
   2423 def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>;
   2424 def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>;
   2425 
   2426 // NEON integer multiply
   2427 //
   2428 // Note: these don't quite match the timing docs, but they do match
   2429 // the original A9 itinerary.
   2430 def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>;
   2431 def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>;
   2432 def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>;
   2433 def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>;
   2434 def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>;
   2435 def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>;
   2436 def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>;
   2437 def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>;
   2438 
   2439 // NEON integer shift
   2440 // TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles.
   2441 def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>;
   2442 def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>;
   2443 
   2444 // NEON permute
   2445 def :ItinRW<[A9WriteV2, A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>;
   2446 def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2],
   2447             [IIC_VPERMQ3, IIC_VEXTQ]>;
   2448 def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>;
   2449 def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>;
   2450 def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>;
   2451 def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>;
   2452 def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>;
   2453 def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>;
   2454 def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>;
   2455 def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3],
   2456             [IIC_VTBX4]>;
   2457 
   2458 // NEON floating-point
   2459 def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>;
   2460 def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>;
   2461 def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>;
   2462 def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>;
   2463 def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>;
   2464 def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>;
   2465 def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>;
   2466 def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>;
   2467 
   2468 // Map SchedRWs that are identical for cortexa9 to existing resources.
   2469 def : SchedAlias<WriteALU, A9WriteALU>;
   2470 def : SchedAlias<WriteALUsr, A9WriteALUsr>;
   2471 def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
   2472 def : SchedAlias<ReadALU, A9ReadALU>;
   2473 def : SchedAlias<ReadALUsr, A9ReadALU>;
   2474 def : InstRW< [WriteALU],
   2475       (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
   2476                  "BICrr")>;
   2477 def : InstRW< [WriteALUsi], (instregex "ANDrsi", "ORRrsi", "EORrsi", "BICrsi")>;
   2478 def : InstRW< [WriteALUsr], (instregex "ANDrsr", "ORRrsr", "EORrsr", "BICrsr")>;
   2479 
   2480 
   2481 def : SchedAlias<WriteCMP, A9WriteALU>;
   2482 def : SchedAlias<WriteCMPsi, A9WriteALU>;
   2483 def : SchedAlias<WriteCMPsr, A9WriteALU>;
   2484 
   2485 def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi",
   2486                                        "MOVCCsr")>;
   2487 def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>;
   2488 def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm",
   2489                                       "MOV_ga_dyn")>;
   2490 def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>;
   2491 def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
   2492 
   2493 def : InstRW< [WriteALU], (instregex "SEL")>;
   2494 
   2495 def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>;
   2496 
   2497 def : InstRW< [A9WriteM],
   2498       (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS",
   2499       "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>;
   2500 def : InstRW< [A9WriteM, A9WriteMHi],
   2501       (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL",
   2502       "UMAAL", "SMLALv5", "UMLALv5", "UMAALv5", "SMLALBB", "SMLALBT", "SMLALTB",
   2503       "SMLALTT")>;
   2504 // FIXME: These instructions used to have NoItinerary. Just copied the one from above.
   2505 def : InstRW< [A9WriteM, A9WriteMHi],
   2506       (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX",
   2507       "SMLSLD", "SMLLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>;
   2508 
   2509 def : InstRW<[A9WriteM16, A9WriteM16Hi],
   2510       (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>;
   2511 def : InstRW<[A9WriteM16, A9WriteM16Hi],
   2512       (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>;
   2513 
   2514 def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>;
   2515 def : InstRW<[A9WriteLsi], (instregex "LDRrs")>;
   2516 def : InstRW<[A9WriteLb],
   2517       (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB",
   2518       "LDRH", "LDRSH", "LDRSB")>;
   2519 def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>;
   2520 
   2521 def : WriteRes<WriteDiv, []> { let Latency = 0; }
   2522 
   2523 def : WriteRes<WriteBr, [A9UnitB]>;
   2524 def : WriteRes<WriteBrL, [A9UnitB]>;
   2525 def : WriteRes<WriteBrTbl, [A9UnitB]>;
   2526 def : WriteRes<WritePreLd, []>;
   2527 def : SchedAlias<WriteCvtFP, A9WriteF>;
   2528 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
   2529 } // SchedModel = CortexA9Model
   2530