Home | History | Annotate | Download | only in ARM
      1 //=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the itinerary class data for the Swift processor..
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 // ===---------------------------------------------------------------------===//
     15 // This section contains legacy support for itineraries. This is
     16 // required until SD and PostRA schedulers are replaced by MachineScheduler.
     17 
     18 def SW_DIS0 : FuncUnit;
     19 def SW_DIS1 : FuncUnit;
     20 def SW_DIS2 : FuncUnit;
     21 
     22 def SW_ALU0 : FuncUnit;
     23 def SW_ALU1 : FuncUnit;
     24 def SW_LS   : FuncUnit;
     25 def SW_IDIV : FuncUnit;
     26 def SW_FDIV : FuncUnit;
     27 
     28 // FIXME: Need bypasses.
     29 // FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and
     30 //        IIC_iMOVix2ld better.
     31 // FIXME: Model the special immediate shifts that are not microcoded.
     32 // FIXME: Do we need to model the fact that uses of r15 in a micro-op force it
     33 //        to issue on pipe 1?
     34 // FIXME: Model the pipelined behavior of CMP / TST instructions.
     35 // FIXME: Better model the microcode stages of multiply instructions, especially
     36 //        conditional variants.
     37 // FIXME: Add preload instruction when it is documented.
     38 // FIXME: Model non-pipelined nature of FP div / sqrt unit.
     39 
     40 def SwiftItineraries : ProcessorItineraries<
     41   [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
     42   //
     43   // Move instructions, unconditional
     44   InstrItinData<IIC_iMOVi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     45                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     46                               [1]>,
     47   InstrItinData<IIC_iMOVr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     48                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     49                               [1]>,
     50   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     51                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     52                               [1]>,
     53   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     54                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     55                               [1]>,
     56   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     57                                InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     58                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
     59                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     60                               [2]>,
     61   InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     62                                   InstrStage<1, [SW_ALU0, SW_ALU1]>,
     63                                   InstrStage<1, [SW_ALU0, SW_ALU1]>,
     64                                   InstrStage<1, [SW_ALU0, SW_ALU1]>],
     65                                  [3]>,
     66   InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     67                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
     68                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
     69                                InstrStage<1, [SW_LS]>],
     70                               [5]>,
     71   //
     72   // MVN instructions
     73   InstrItinData<IIC_iMVNi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     74                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     75                               [1]>,
     76   InstrItinData<IIC_iMVNr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     77                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     78                               [1]>,
     79   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     80                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     81                               [1]>,
     82   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     83                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     84                               [1]>,
     85   //
     86   // No operand cycles
     87   InstrItinData<IIC_iALUx   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     88                                InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
     89   //
     90   // Binary Instructions that produce a result
     91   InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     92                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
     93                             [1, 1]>,
     94   InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     95                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
     96                             [1, 1, 1]>,
     97   InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     98                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
     99                             [2, 1, 1]>,
    100   InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    101                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    102                             [2, 1, 1]>,
    103   InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    104                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    105                             [2, 1, 1, 1]>,
    106   //
    107   // Bitwise Instructions that produce a result
    108   InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    109                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    110                             [1, 1]>,
    111   InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    112                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    113                             [1, 1, 1]>,
    114   InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    115                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    116                             [2, 1, 1]>,
    117   InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    118                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    119                             [2, 1, 1, 1]>,
    120   //
    121   // Unary Instructions that produce a result
    122 
    123   // CLZ, RBIT, etc.
    124   InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    125                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    126                             [1, 1]>,
    127 
    128   // BFC, BFI, UBFX, SBFX
    129   InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    130                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    131                             [2, 1]>,
    132 
    133   //
    134   // Zero and sign extension instructions
    135   InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    136                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    137                             [1, 1]>,
    138   InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    139                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    140                             [1, 1, 1]>,
    141   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    142                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    143                             [1, 1, 1, 1]>,
    144   //
    145   // Compare instructions
    146   InstrItinData<IIC_iCMPi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    147                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    148                               [1]>,
    149   InstrItinData<IIC_iCMPr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    150                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    151                               [1, 1]>,
    152   InstrItinData<IIC_iCMPsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    153                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
    154                               [1, 1]>,
    155   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    156                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
    157                               [1, 1, 1]>,
    158   //
    159   // Test instructions
    160   InstrItinData<IIC_iTSTi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    161                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    162                               [1]>,
    163   InstrItinData<IIC_iTSTr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    164                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    165                               [1, 1]>,
    166   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    167                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
    168                               [1, 1]>,
    169   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    170                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
    171                               [1, 1, 1]>,
    172   //
    173   // Move instructions, conditional
    174   // FIXME: Correctly model the extra input dep on the destination.
    175   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    176                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    177                               [1]>,
    178   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    179                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    180                               [1, 1]>,
    181   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    182                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    183                               [1, 1]>,
    184   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    185                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    186                               [2, 1, 1]>,
    187   InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    188                                InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    189                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
    190                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    191                               [2]>,
    192 
    193   // Integer multiply pipeline
    194   //
    195   InstrItinData<IIC_iMUL16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    196                                InstrStage<1, [SW_ALU0]>],
    197                               [3, 1, 1]>,
    198   InstrItinData<IIC_iMAC16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    199                                InstrStage<1, [SW_ALU0]>],
    200                               [3, 1, 1, 1]>,
    201   InstrItinData<IIC_iMUL32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    202                                InstrStage<1, [SW_ALU0]>],
    203                               [4, 1, 1]>,
    204   InstrItinData<IIC_iMAC32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    205                                InstrStage<1, [SW_ALU0]>],
    206                               [4, 1, 1, 1]>,
    207   InstrItinData<IIC_iMUL64  , [InstrStage<1, [SW_DIS0], 0>,
    208                                InstrStage<1, [SW_DIS1], 0>,
    209                                InstrStage<1, [SW_DIS2], 0>,
    210                                InstrStage<1, [SW_ALU0], 1>,
    211                                InstrStage<1, [SW_ALU0], 3>,
    212                                InstrStage<1, [SW_ALU0]>],
    213                               [5, 5, 1, 1]>,
    214   InstrItinData<IIC_iMAC64  , [InstrStage<1, [SW_DIS0], 0>,
    215                                InstrStage<1, [SW_DIS1], 0>,
    216                                InstrStage<1, [SW_DIS2], 0>,
    217                                InstrStage<1, [SW_ALU0], 1>,
    218                                InstrStage<1, [SW_ALU0], 1>,
    219                                InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
    220                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    221                               [5, 6, 1, 1]>,
    222   //
    223   // Integer divide
    224   InstrItinData<IIC_iDIV  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    225                              InstrStage<1, [SW_ALU0], 0>,
    226                              InstrStage<14, [SW_IDIV]>],
    227                             [14, 1, 1]>,
    228 
    229   // Integer load pipeline
    230   // FIXME: The timings are some rough approximations
    231   //
    232   // Immediate offset
    233   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    234                                  InstrStage<1, [SW_LS]>],
    235                                 [3, 1]>,
    236   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    237                                  InstrStage<1, [SW_LS]>],
    238                                 [3, 1]>,
    239   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
    240                                  InstrStage<1, [SW_DIS1], 0>,
    241                                  InstrStage<1, [SW_LS], 1>,
    242                                  InstrStage<1, [SW_LS]>],
    243                                 [3, 4, 1]>,
    244   //
    245   // Register offset
    246   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    247                                  InstrStage<1, [SW_LS]>],
    248                                 [3, 1, 1]>,
    249   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    250                                  InstrStage<1, [SW_LS]>],
    251                                 [3, 1, 1]>,
    252   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
    253                                  InstrStage<1, [SW_DIS1], 0>,
    254                                  InstrStage<1, [SW_DIS2], 0>,
    255                                  InstrStage<1, [SW_LS], 1>,
    256                                  InstrStage<1, [SW_LS], 3>,
    257                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
    258                                 [3, 4, 1, 1]>,
    259   //
    260   // Scaled register offset
    261   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [SW_DIS0], 0>,
    262                                  InstrStage<1, [SW_DIS1], 0>,
    263                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    264                                  InstrStage<1, [SW_LS]>],
    265                                 [5, 1, 1]>,
    266   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
    267                                  InstrStage<1, [SW_DIS1], 0>,
    268                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    269                                  InstrStage<1, [SW_LS]>],
    270                                 [5, 1, 1]>,
    271   //
    272   // Immediate offset with update
    273   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [SW_DIS0], 0>,
    274                                  InstrStage<1, [SW_DIS1], 0>,
    275                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    276                                  InstrStage<1, [SW_LS]>],
    277                                 [3, 1, 1]>,
    278   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
    279                                  InstrStage<1, [SW_DIS1], 0>,
    280                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    281                                  InstrStage<1, [SW_LS]>],
    282                                 [3, 1, 1]>,
    283   //
    284   // Register offset with update
    285   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [SW_DIS0], 0>,
    286                                  InstrStage<1, [SW_DIS1], 0>,
    287                                  InstrStage<1, [SW_ALU0], 1>,
    288                                  InstrStage<1, [SW_LS]>],
    289                                 [3, 1, 1, 1]>,
    290   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
    291                                  InstrStage<1, [SW_DIS1], 0>,
    292                                  InstrStage<1, [SW_ALU0], 1>,
    293                                  InstrStage<1, [SW_LS]>],
    294                                 [3, 1, 1, 1]>,
    295   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
    296                                  InstrStage<1, [SW_DIS1], 0>,
    297                                  InstrStage<1, [SW_DIS2], 0>,
    298                                  InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
    299                                  InstrStage<1, [SW_LS], 3>,
    300                                  InstrStage<1, [SW_LS], 0>,
    301                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
    302                                 [3, 4, 1, 1]>,
    303   //
    304   // Scaled register offset with update
    305   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
    306                                  InstrStage<1, [SW_DIS1], 0>,
    307                                  InstrStage<1, [SW_DIS2], 0>,
    308                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    309                                  InstrStage<1, [SW_LS], 3>,
    310                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
    311                                 [5, 3, 1, 1]>,
    312   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
    313                                   InstrStage<1, [SW_DIS1], 0>,
    314                                   InstrStage<1, [SW_DIS2], 0>,
    315                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    316                                   InstrStage<1, [SW_LS], 0>,
    317                                   InstrStage<1, [SW_ALU0, SW_ALU1]>],
    318                                 [5, 3, 1, 1]>,
    319   //
    320   // Load multiple, def is the 5th operand.
    321   // FIXME: This assumes 3 to 4 registers.
    322   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [SW_DIS0], 0>,
    323                                 InstrStage<1, [SW_DIS1], 0>,
    324                                 InstrStage<1, [SW_DIS2], 0>,
    325                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    326                                 InstrStage<1, [SW_LS]>],
    327                                [1, 1, 1, 1, 3], [], -1>, // dynamic uops
    328 
    329   //
    330   // Load multiple + update, defs are the 1st and 5th operands.
    331   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
    332                                 InstrStage<1, [SW_DIS1], 0>,
    333                                 InstrStage<1, [SW_DIS2], 0>,
    334                                 InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
    335                                 InstrStage<1, [SW_LS], 3>,
    336                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
    337                                [2, 1, 1, 1, 3], [], -1>, // dynamic uops
    338   //
    339   // Load multiple plus branch
    340   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
    341                                 InstrStage<1, [SW_DIS1], 0>,
    342                                 InstrStage<1, [SW_DIS2], 0>,
    343                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    344                                 InstrStage<1, [SW_LS]>],
    345                                [1, 1, 1, 1, 3], [], -1>, // dynamic uops
    346   //
    347   // Pop, def is the 3rd operand.
    348   InstrItinData<IIC_iPop  ,    [InstrStage<1, [SW_DIS0], 0>,
    349                                 InstrStage<1, [SW_DIS1], 0>,
    350                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    351                                 InstrStage<1, [SW_LS]>],
    352                                [1, 1, 3], [], -1>, // dynamic uops
    353   //
    354   // Pop + branch, def is the 3rd operand.
    355   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [SW_DIS0], 0>,
    356                                 InstrStage<1, [SW_DIS1], 0>,
    357                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    358                                 InstrStage<1, [SW_LS]>],
    359                                [1, 1, 3], [], -1>, // dynamic uops
    360 
    361   //
    362   // iLoadi + iALUr for t2LDRpci_pic.
    363   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    364                                 InstrStage<1, [SW_LS], 3>,
    365                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
    366                                [4, 1]>,
    367 
    368   // Integer store pipeline
    369   ///
    370   // Immediate offset
    371   InstrItinData<IIC_iStore_i  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    372                                  InstrStage<1, [SW_LS]>],
    373                                 [1, 1]>,
    374   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    375                                  InstrStage<1, [SW_LS]>],
    376                                 [1, 1]>,
    377   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
    378                                  InstrStage<1, [SW_DIS1], 0>,
    379                                  InstrStage<1, [SW_DIS2], 0>,
    380                                  InstrStage<1, [SW_LS], 0>,
    381                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    382                                  InstrStage<1, [SW_LS]>],
    383                                 [1, 1]>,
    384   //
    385   // Register offset
    386   InstrItinData<IIC_iStore_r  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    387                                  InstrStage<1, [SW_LS]>],
    388                                 [1, 1, 1]>,
    389   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    390                                  InstrStage<1, [SW_LS]>],
    391                                 [1, 1, 1]>,
    392   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
    393                                  InstrStage<1, [SW_DIS1], 0>,
    394                                  InstrStage<1, [SW_DIS2], 0>,
    395                                  InstrStage<1, [SW_LS], 0>,
    396                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    397                                  InstrStage<1, [SW_LS]>],
    398                                 [1, 1, 1]>,
    399   //
    400   // Scaled register offset
    401   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [SW_DIS0], 0>,
    402                                   InstrStage<1, [SW_DIS1], 0>,
    403                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    404                                   InstrStage<1, [SW_LS]>],
    405                                  [1, 1, 1]>,
    406   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
    407                                   InstrStage<1, [SW_DIS1], 0>,
    408                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    409                                   InstrStage<1, [SW_LS]>],
    410                                  [1, 1, 1]>,
    411   //
    412   // Immediate offset with update
    413   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [SW_DIS0], 0>,
    414                                   InstrStage<1, [SW_DIS1], 0>,
    415                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    416                                   InstrStage<1, [SW_LS]>],
    417                                  [1, 1, 1]>,
    418   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
    419                                   InstrStage<1, [SW_DIS1], 0>,
    420                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    421                                   InstrStage<1, [SW_LS]>],
    422                                  [1, 1, 1]>,
    423   //
    424   // Register offset with update
    425   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [SW_DIS0], 0>,
    426                                   InstrStage<1, [SW_DIS1], 0>,
    427                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    428                                   InstrStage<1, [SW_LS]>],
    429                                  [1, 1, 1, 1]>,
    430   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
    431                                   InstrStage<1, [SW_DIS1], 0>,
    432                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    433                                   InstrStage<1, [SW_LS]>],
    434                                  [1, 1, 1, 1]>,
    435   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
    436                                   InstrStage<1, [SW_DIS1], 0>,
    437                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    438                                   InstrStage<1, [SW_LS]>],
    439                                  [1, 1, 1, 1]>,
    440   //
    441   // Scaled register offset with update
    442   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [SW_DIS0], 0>,
    443                                     InstrStage<1, [SW_DIS1], 0>,
    444                                     InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    445                                     InstrStage<1, [SW_LS], 0>,
    446                                     InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
    447                                    [3, 1, 1, 1]>,
    448   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
    449                                     InstrStage<1, [SW_DIS1], 0>,
    450                                     InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    451                                     InstrStage<1, [SW_LS], 0>,
    452                                     InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
    453                                    [3, 1, 1, 1]>,
    454   //
    455   // Store multiple
    456   InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
    457                                 InstrStage<1, [SW_DIS1], 0>,
    458                                 InstrStage<1, [SW_DIS2], 0>,
    459                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    460                                 InstrStage<1, [SW_LS], 1>,
    461                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    462                                 InstrStage<1, [SW_LS], 1>,
    463                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    464                                 InstrStage<1, [SW_LS]>],
    465                                 [], [], -1>, // dynamic uops
    466   //
    467   // Store multiple + update
    468   InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
    469                                 InstrStage<1, [SW_DIS1], 0>,
    470                                 InstrStage<1, [SW_DIS2], 0>,
    471                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    472                                 InstrStage<1, [SW_LS], 1>,
    473                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    474                                 InstrStage<1, [SW_LS], 1>,
    475                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    476                                 InstrStage<1, [SW_LS]>],
    477                                [2], [], -1>, // dynamic uops
    478 
    479   //
    480   // Preload
    481   InstrItinData<IIC_Preload,   [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
    482 
    483   // Branch
    484   //
    485   // no delay slots, so the latency of a branch is unimportant
    486   InstrItinData<IIC_Br       , [InstrStage<1, [SW_DIS0], 0>]>,
    487 
    488   // FP Special Register to Integer Register File Move
    489   InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    490                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
    491                              [1]>,
    492   //
    493   // Single-precision FP Unary
    494   //
    495   // Most floating-point moves get issued on ALU0.
    496   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    497                                InstrStage<1, [SW_ALU0]>],
    498                               [2, 1]>,
    499   //
    500   // Double-precision FP Unary
    501   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    502                                InstrStage<1, [SW_ALU0]>],
    503                               [2, 1]>,
    504 
    505   //
    506   // Single-precision FP Compare
    507   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    508                                InstrStage<1, [SW_ALU0]>],
    509                               [1, 1]>,
    510   //
    511   // Double-precision FP Compare
    512   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    513                                InstrStage<1, [SW_ALU0]>],
    514                               [1, 1]>,
    515   //
    516   // Single to Double FP Convert
    517   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    518                                InstrStage<1, [SW_ALU1]>],
    519                               [4, 1]>,
    520   //
    521   // Double to Single FP Convert
    522   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    523                                InstrStage<1, [SW_ALU1]>],
    524                               [4, 1]>,
    525 
    526   //
    527   // Single to Half FP Convert
    528   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
    529                                InstrStage<1, [SW_DIS1], 0>,
    530                                InstrStage<1, [SW_ALU1], 4>,
    531                                InstrStage<1, [SW_ALU1]>],
    532                               [6, 1]>,
    533   //
    534   // Half to Single FP Convert
    535   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    536                                InstrStage<1, [SW_ALU1]>],
    537                               [4, 1]>,
    538 
    539   //
    540   // Single-Precision FP to Integer Convert
    541   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    542                                InstrStage<1, [SW_ALU1]>],
    543                               [4, 1]>,
    544   //
    545   // Double-Precision FP to Integer Convert
    546   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    547                                InstrStage<1, [SW_ALU1]>],
    548                               [4, 1]>,
    549   //
    550   // Integer to Single-Precision FP Convert
    551   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    552                                InstrStage<1, [SW_ALU1]>],
    553                               [4, 1]>,
    554   //
    555   // Integer to Double-Precision FP Convert
    556   InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    557                                InstrStage<1, [SW_ALU1]>],
    558                               [4, 1]>,
    559   //
    560   // Single-precision FP ALU
    561   InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    562                                InstrStage<1, [SW_ALU0]>],
    563                               [2, 1, 1]>,
    564   //
    565   // Double-precision FP ALU
    566   InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    567                                InstrStage<1, [SW_ALU0]>],
    568                               [2, 1, 1]>,
    569   //
    570   // Single-precision FP Multiply
    571   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    572                                InstrStage<1, [SW_ALU1]>],
    573                               [4, 1, 1]>,
    574   //
    575   // Double-precision FP Multiply
    576   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    577                                InstrStage<1, [SW_ALU1]>],
    578                               [6, 1, 1]>,
    579   //
    580   // Single-precision FP MAC
    581   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    582                                InstrStage<1, [SW_ALU1]>],
    583                               [8, 1, 1]>,
    584   //
    585   // Double-precision FP MAC
    586   InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    587                                InstrStage<1, [SW_ALU1]>],
    588                               [12, 1, 1]>,
    589   //
    590   // Single-precision Fused FP MAC
    591   InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    592                                InstrStage<1, [SW_ALU1]>],
    593                               [8, 1, 1]>,
    594   //
    595   // Double-precision Fused FP MAC
    596   InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    597                                InstrStage<1, [SW_ALU1]>],
    598                               [12, 1, 1]>,
    599   //
    600   // Single-precision FP DIV
    601   InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    602                                InstrStage<1, [SW_ALU1], 0>,
    603                                InstrStage<15, [SW_FDIV]>],
    604                               [17, 1, 1]>,
    605   //
    606   // Double-precision FP DIV
    607   InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    608                                InstrStage<1, [SW_ALU1], 0>,
    609                                InstrStage<30, [SW_FDIV]>],
    610                               [32, 1, 1]>,
    611   //
    612   // Single-precision FP SQRT
    613   InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    614                                InstrStage<1, [SW_ALU1], 0>,
    615                                InstrStage<15, [SW_FDIV]>],
    616                               [17, 1]>,
    617   //
    618   // Double-precision FP SQRT
    619   InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    620                                InstrStage<1, [SW_ALU1], 0>,
    621                                InstrStage<30, [SW_FDIV]>],
    622                               [32, 1, 1]>,
    623 
    624   //
    625   // Integer to Single-precision Move
    626   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [SW_DIS0], 0>,
    627                                InstrStage<1, [SW_DIS1], 0>,
    628                                InstrStage<1, [SW_LS], 4>,
    629                                InstrStage<1, [SW_ALU0]>],
    630                               [6, 1]>,
    631   //
    632   // Integer to Double-precision Move
    633   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    634                                InstrStage<1, [SW_LS]>],
    635                               [4, 1]>,
    636   //
    637   // Single-precision to Integer Move
    638   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    639                                InstrStage<1, [SW_LS]>],
    640                               [3, 1]>,
    641   //
    642   // Double-precision to Integer Move
    643   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [SW_DIS0], 0>,
    644                                InstrStage<1, [SW_DIS1], 0>,
    645                                InstrStage<1, [SW_LS], 3>,
    646                                InstrStage<1, [SW_LS]>],
    647                               [3, 4, 1]>,
    648   //
    649   // Single-precision FP Load
    650   InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    651                                InstrStage<1, [SW_LS]>],
    652                               [4, 1]>,
    653   //
    654   // Double-precision FP Load
    655   InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    656                                InstrStage<1, [SW_LS]>],
    657                               [4, 1]>,
    658   //
    659   // FP Load Multiple
    660   // FIXME: Assumes a single Q register.
    661   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    662                                InstrStage<1, [SW_LS]>],
    663                               [1, 1, 1, 4], [], -1>, // dynamic uops
    664   //
    665   // FP Load Multiple + update
    666   // FIXME: Assumes a single Q register.
    667   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
    668                                InstrStage<1, [SW_DIS1], 0>,
    669                                InstrStage<1, [SW_LS], 4>,
    670                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    671                               [2, 1, 1, 1, 4], [], -1>, // dynamic uops
    672   //
    673   // Single-precision FP Store
    674   InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    675                                InstrStage<1, [SW_LS]>],
    676                               [1, 1]>,
    677   //
    678   // Double-precision FP Store
    679   InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    680                                InstrStage<1, [SW_LS]>],
    681                               [1, 1]>,
    682   //
    683   // FP Store Multiple
    684   // FIXME: Assumes a single Q register.
    685   InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    686                                InstrStage<1, [SW_LS]>],
    687                               [1, 1, 1], [], -1>, // dynamic uops
    688   //
    689   // FP Store Multiple + update
    690   // FIXME: Assumes a single Q register.
    691   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
    692                                 InstrStage<1, [SW_DIS1], 0>,
    693                                 InstrStage<1, [SW_LS], 4>,
    694                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
    695                                [2, 1, 1, 1], [], -1>, // dynamic uops
    696   // NEON
    697   //
    698   // Double-register Integer Unary
    699   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    700                                InstrStage<1, [SW_ALU0]>],
    701                               [4, 1]>,
    702   //
    703   // Quad-register Integer Unary
    704   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    705                                InstrStage<1, [SW_ALU0]>],
    706                               [4, 1]>,
    707   //
    708   // Double-register Integer Q-Unary
    709   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    710                                InstrStage<1, [SW_ALU0]>],
    711                               [4, 1]>,
    712   //
    713   // Quad-register Integer CountQ-Unary
    714   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    715                                InstrStage<1, [SW_ALU0]>],
    716                               [4, 1]>,
    717   //
    718   // Double-register Integer Binary
    719   InstrItinData<IIC_VBINiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    720                                InstrStage<1, [SW_ALU0]>],
    721                               [2, 1, 1]>,
    722   //
    723   // Quad-register Integer Binary
    724   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    725                                InstrStage<1, [SW_ALU0]>],
    726                               [2, 1, 1]>,
    727   //
    728   // Double-register Integer Subtract
    729   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    730                                InstrStage<1, [SW_ALU0]>],
    731                               [2, 1, 1]>,
    732   //
    733   // Quad-register Integer Subtract
    734   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    735                                InstrStage<1, [SW_ALU0]>],
    736                               [2, 1, 1]>,
    737   //
    738   // Double-register Integer Shift
    739   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    740                                InstrStage<1, [SW_ALU0]>],
    741                               [2, 1, 1]>,
    742   //
    743   // Quad-register Integer Shift
    744   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    745                                InstrStage<1, [SW_ALU0]>],
    746                               [2, 1, 1]>,
    747   //
    748   // Double-register Integer Shift (4 cycle)
    749   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    750                                InstrStage<1, [SW_ALU0]>],
    751                               [4, 1, 1]>,
    752   //
    753   // Quad-register Integer Shift (4 cycle)
    754   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    755                                InstrStage<1, [SW_ALU0]>],
    756                               [4, 1, 1]>,
    757   //
    758   // Double-register Integer Binary (4 cycle)
    759   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    760                                InstrStage<1, [SW_ALU0]>],
    761                               [4, 1, 1]>,
    762   //
    763   // Quad-register Integer Binary (4 cycle)
    764   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    765                                InstrStage<1, [SW_ALU0]>],
    766                               [4, 1, 1]>,
    767   //
    768   // Double-register Integer Subtract (4 cycle)
    769   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    770                                InstrStage<1, [SW_ALU0]>],
    771                               [4, 1, 1]>,
    772   //
    773   // Quad-register Integer Subtract (4 cycle)
    774   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    775                                InstrStage<1, [SW_ALU0]>],
    776                               [4, 1, 1]>,
    777 
    778   //
    779   // Double-register Integer Count
    780   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    781                                InstrStage<1, [SW_ALU0]>],
    782                               [2, 1, 1]>,
    783   //
    784   // Quad-register Integer Count
    785   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    786                                InstrStage<1, [SW_ALU0]>],
    787                               [2, 1, 1]>,
    788   //
    789   // Double-register Absolute Difference and Accumulate
    790   InstrItinData<IIC_VABAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    791                                InstrStage<1, [SW_ALU0]>],
    792                               [4, 1, 1, 1]>,
    793   //
    794   // Quad-register Absolute Difference and Accumulate
    795   InstrItinData<IIC_VABAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    796                                InstrStage<1, [SW_ALU0]>],
    797                               [4, 1, 1, 1]>,
    798   //
    799   // Double-register Integer Pair Add Long
    800   InstrItinData<IIC_VPALiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    801                                InstrStage<1, [SW_ALU0]>],
    802                               [4, 1, 1]>,
    803   //
    804   // Quad-register Integer Pair Add Long
    805   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    806                                InstrStage<1, [SW_ALU0]>],
    807                               [4, 1, 1]>,
    808 
    809   //
    810   // Double-register Integer Multiply (.8, .16)
    811   InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    812                                InstrStage<1, [SW_ALU1]>],
    813                               [4, 1, 1]>,
    814   //
    815   // Quad-register Integer Multiply (.8, .16)
    816   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    817                                InstrStage<1, [SW_ALU1]>],
    818                               [4, 1, 1]>,
    819 
    820   //
    821   // Double-register Integer Multiply (.32)
    822   InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    823                                InstrStage<1, [SW_ALU1]>],
    824                               [4, 1, 1]>,
    825   //
    826   // Quad-register Integer Multiply (.32)
    827   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    828                                InstrStage<1, [SW_ALU1]>],
    829                               [4, 1, 1]>,
    830   //
    831   // Double-register Integer Multiply-Accumulate (.8, .16)
    832   InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    833                                InstrStage<1, [SW_ALU1]>],
    834                               [4, 1, 1, 1]>,
    835   //
    836   // Double-register Integer Multiply-Accumulate (.32)
    837   InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    838                                InstrStage<1, [SW_ALU1]>],
    839                               [4, 1, 1, 1]>,
    840   //
    841   // Quad-register Integer Multiply-Accumulate (.8, .16)
    842   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    843                                InstrStage<1, [SW_ALU1]>],
    844                               [4, 1, 1, 1]>,
    845   //
    846   // Quad-register Integer Multiply-Accumulate (.32)
    847   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    848                                InstrStage<1, [SW_ALU1]>],
    849                               [4, 1, 1, 1]>,
    850 
    851   //
    852   // Move
    853   InstrItinData<IIC_VMOV,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    854                                InstrStage<1, [SW_ALU0]>],
    855                               [2, 1]>,
    856   //
    857   // Move Immediate
    858   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    859                                InstrStage<1, [SW_ALU0]>],
    860                               [2]>,
    861   //
    862   // Double-register Permute Move
    863   InstrItinData<IIC_VMOVD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    864                                InstrStage<1, [SW_ALU1]>],
    865                               [2, 1]>,
    866   //
    867   // Quad-register Permute Move
    868   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    869                                InstrStage<1, [SW_ALU1]>],
    870                               [2, 1]>,
    871   //
    872   // Integer to Single-precision Move
    873   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [SW_DIS0], 0>,
    874                                InstrStage<1, [SW_DIS1], 0>,
    875                                InstrStage<1, [SW_LS], 4>,
    876                                InstrStage<1, [SW_ALU0]>],
    877                               [6, 1]>,
    878   //
    879   // Integer to Double-precision Move
    880   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    881                                InstrStage<1, [SW_LS]>],
    882                               [4, 1, 1]>,
    883   //
    884   // Single-precision to Integer Move
    885   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    886                                InstrStage<1, [SW_LS]>],
    887                               [3, 1]>,
    888   //
    889   // Double-precision to Integer Move
    890   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [SW_DIS0], 0>,
    891                                InstrStage<1, [SW_DIS1], 0>,
    892                                InstrStage<1, [SW_LS], 3>,
    893                                InstrStage<1, [SW_LS]>],
    894                               [3, 4, 1]>,
    895   //
    896   // Integer to Lane Move
    897   // FIXME: I think this is correct, but it is not clear from the tuning guide.
    898   InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
    899                                InstrStage<1, [SW_DIS1], 0>,
    900                                InstrStage<1, [SW_LS], 4>,
    901                                InstrStage<1, [SW_ALU0]>],
    902                               [6, 1]>,
    903 
    904   //
    905   // Vector narrow move
    906   InstrItinData<IIC_VMOVN,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    907                                InstrStage<1, [SW_ALU1]>],
    908                               [2, 1]>,
    909   //
    910   // Double-register FP Unary
    911   // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
    912   //        and they issue on a different pipeline.
    913   InstrItinData<IIC_VUNAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    914                                InstrStage<1, [SW_ALU0]>],
    915                               [2, 1]>,
    916   //
    917   // Quad-register FP Unary
    918   // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
    919   //        and they issue on a different pipeline.
    920   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    921                                InstrStage<1, [SW_ALU0]>],
    922                               [2, 1]>,
    923   //
    924   // Double-register FP Binary
    925   // FIXME: We're using this itin for many instructions.
    926   InstrItinData<IIC_VBIND,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    927                                InstrStage<1, [SW_ALU0]>],
    928                               [4, 1, 1]>,
    929 
    930   //
    931   // VPADD, etc.
    932   InstrItinData<IIC_VPBIND,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    933                                InstrStage<1, [SW_ALU0]>],
    934                               [4, 1, 1]>,
    935   //
    936   // Double-register FP VMUL
    937   InstrItinData<IIC_VFMULD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    938                                InstrStage<1, [SW_ALU1]>],
    939                               [4, 1, 1]>,
    940   //
    941   // Quad-register FP Binary
    942   InstrItinData<IIC_VBINQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    943                                InstrStage<1, [SW_ALU0]>],
    944                               [4, 1, 1]>,
    945   //
    946   // Quad-register FP VMUL
    947   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    948                                InstrStage<1, [SW_ALU1]>],
    949                               [4, 1, 1]>,
    950   //
    951   // Double-register FP Multiple-Accumulate
    952   InstrItinData<IIC_VMACD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    953                                InstrStage<1, [SW_ALU1]>],
    954                               [8, 1, 1]>,
    955   //
    956   // Quad-register FP Multiple-Accumulate
    957   InstrItinData<IIC_VMACQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    958                                InstrStage<1, [SW_ALU1]>],
    959                               [8, 1, 1]>,
    960   //
    961   // Double-register Fused FP Multiple-Accumulate
    962   InstrItinData<IIC_VFMACD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    963                                InstrStage<1, [SW_ALU1]>],
    964                               [8, 1, 1]>,
    965   //
    966   // Quad-register FusedF P Multiple-Accumulate
    967   InstrItinData<IIC_VFMACQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    968                                InstrStage<1, [SW_ALU1]>],
    969                               [8, 1, 1]>,
    970   //
    971   // Double-register Reciprical Step
    972   InstrItinData<IIC_VRECSD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    973                                InstrStage<1, [SW_ALU1]>],
    974                               [8, 1, 1]>,
    975   //
    976   // Quad-register Reciprical Step
    977   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    978                                InstrStage<1, [SW_ALU1]>],
    979                               [8, 1, 1]>,
    980   //
    981   // Double-register Permute
    982   // FIXME: The latencies are unclear from the documentation.
    983   InstrItinData<IIC_VPERMD,   [InstrStage<1, [SW_DIS0], 0>,
    984                                InstrStage<1, [SW_DIS1], 0>,
    985                                InstrStage<1, [SW_DIS2], 0>,
    986                                InstrStage<1, [SW_ALU1], 2>,
    987                                InstrStage<1, [SW_ALU1], 2>,
    988                                InstrStage<1, [SW_ALU1]>],
    989                               [3, 4, 3, 4]>,
    990   //
    991   // Quad-register Permute
    992   // FIXME: The latencies are unclear from the documentation.
    993   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [SW_DIS0], 0>,
    994                                InstrStage<1, [SW_DIS1], 0>,
    995                                InstrStage<1, [SW_DIS2], 0>,
    996                                InstrStage<1, [SW_ALU1], 2>,
    997                                InstrStage<1, [SW_ALU1], 2>,
    998                                InstrStage<1, [SW_ALU1]>],
    999                               [3, 4, 3, 4]>,
   1000   //
   1001   // Quad-register Permute (3 cycle issue on A9)
   1002   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [SW_DIS0], 0>,
   1003                                InstrStage<1, [SW_DIS1], 0>,
   1004                                InstrStage<1, [SW_DIS2], 0>,
   1005                                InstrStage<1, [SW_ALU1], 2>,
   1006                                InstrStage<1, [SW_ALU1], 2>,
   1007                                InstrStage<1, [SW_ALU1]>],
   1008                               [3, 4, 3, 4]>,
   1009 
   1010   //
   1011   // Double-register VEXT
   1012   InstrItinData<IIC_VEXTD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
   1013                                InstrStage<1, [SW_ALU1]>],
   1014                               [2, 1, 1]>,
   1015   //
   1016   // Quad-register VEXT
   1017   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
   1018                                InstrStage<1, [SW_ALU1]>],
   1019                               [2, 1, 1]>,
   1020   //
   1021   // VTB
   1022   InstrItinData<IIC_VTB1,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
   1023                                InstrStage<1, [SW_ALU1]>],
   1024                               [2, 1, 1]>,
   1025   InstrItinData<IIC_VTB2,     [InstrStage<1, [SW_DIS0], 0>,
   1026                                InstrStage<1, [SW_DIS1], 0>,
   1027                                InstrStage<1, [SW_ALU1], 2>,
   1028                                InstrStage<1, [SW_ALU1]>],
   1029                               [4, 1, 3, 3]>,
   1030   InstrItinData<IIC_VTB3,     [InstrStage<1, [SW_DIS0], 0>,
   1031                                InstrStage<1, [SW_DIS1], 0>,
   1032                                InstrStage<1, [SW_DIS2], 0>,
   1033                                InstrStage<1, [SW_ALU1], 2>,
   1034                                InstrStage<1, [SW_ALU1], 2>,
   1035                                InstrStage<1, [SW_ALU1]>],
   1036                               [6, 1, 3, 5, 5]>,
   1037   InstrItinData<IIC_VTB4,     [InstrStage<1, [SW_DIS0], 0>,
   1038                                InstrStage<1, [SW_DIS1], 0>,
   1039                                InstrStage<1, [SW_DIS2], 0>,
   1040                                InstrStage<1, [SW_ALU1], 2>,
   1041                                InstrStage<1, [SW_ALU1], 2>,
   1042                                InstrStage<1, [SW_ALU1], 2>,
   1043                                InstrStage<1, [SW_ALU1]>],
   1044                               [8, 1, 3, 5, 7, 7]>,
   1045   //
   1046   // VTBX
   1047   InstrItinData<IIC_VTBX1,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
   1048                                InstrStage<1, [SW_ALU1]>],
   1049                               [2, 1, 1]>,
   1050   InstrItinData<IIC_VTBX2,    [InstrStage<1, [SW_DIS0], 0>,
   1051                                InstrStage<1, [SW_DIS1], 0>,
   1052                                InstrStage<1, [SW_ALU1], 2>,
   1053                                InstrStage<1, [SW_ALU1]>],
   1054                               [4, 1, 3, 3]>,
   1055   InstrItinData<IIC_VTBX3,    [InstrStage<1, [SW_DIS0], 0>,
   1056                                InstrStage<1, [SW_DIS1], 0>,
   1057                                InstrStage<1, [SW_DIS2], 0>,
   1058                                InstrStage<1, [SW_ALU1], 2>,
   1059                                InstrStage<1, [SW_ALU1], 2>,
   1060                                InstrStage<1, [SW_ALU1]>],
   1061                               [6, 1, 3, 5, 5]>,
   1062   InstrItinData<IIC_VTBX4,    [InstrStage<1, [SW_DIS0], 0>,
   1063                                InstrStage<1, [SW_DIS1], 0>,
   1064                                InstrStage<1, [SW_DIS2], 0>,
   1065                                InstrStage<1, [SW_ALU1], 2>,
   1066                                InstrStage<1, [SW_ALU1], 2>,
   1067                                InstrStage<1, [SW_ALU1], 2>,
   1068                                InstrStage<1, [SW_ALU1]>],
   1069                               [8, 1, 3, 5, 7, 7]>
   1070 ]>;
   1071 
   1072 // ===---------------------------------------------------------------------===//
   1073 // This following definitions describe the simple machine model which
   1074 // will replace itineraries.
   1075 
   1076 // Swift machine model for scheduling and other instruction cost heuristics.
   1077 def SwiftModel : SchedMachineModel {
   1078   let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
   1079   let MicroOpBufferSize = 45; // Based on NEON renamed registers.
   1080   let LoadLatency = 3;
   1081   let MispredictPenalty = 14; // A branch direction mispredict.
   1082 
   1083   let Itineraries = SwiftItineraries;
   1084 }
   1085 
   1086 // Swift predicates.
   1087 def IsFastImmShiftSwiftPred : SchedPredicate<[{TII->isSwiftFastImmShift(MI)}]>;
   1088 
   1089 // Swift resource mapping.
   1090 let SchedModel = SwiftModel in {
   1091   // Processor resources.
   1092   def SwiftUnitP01 : ProcResource<2>; // ALU unit.
   1093   def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit.
   1094   def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit.
   1095   def SwiftUnitP2 : ProcResource<1>; // LS unit.
   1096   def SwiftUnitDiv : ProcResource<1>;
   1097 
   1098   // Generic resource requirements.
   1099   def SwiftWriteP0OneCycle : SchedWriteRes<[SwiftUnitP0]>;
   1100   def SwiftWriteP0TwoCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 2; }
   1101   def SwiftWriteP0FourCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 4; }
   1102   def SwiftWriteP0SixCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 6; }
   1103   def SwiftWriteP0P1FourCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
   1104     let Latency = 4;
   1105   }
   1106   def SwiftWriteP0P1SixCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
   1107     let Latency = 6;
   1108   }
   1109   def SwiftWriteP01OneCycle : SchedWriteRes<[SwiftUnitP01]>;
   1110   def SwiftWriteP1TwoCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 2; }
   1111   def SwiftWriteP1FourCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 4; }
   1112   def SwiftWriteP1SixCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 6; }
   1113   def SwiftWriteP1EightCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 8; }
   1114   def SwiftWriteP1TwelveCyc : SchedWriteRes<[SwiftUnitP1]> { let Latency = 12; }
   1115   def SwiftWriteP01OneCycle2x : WriteSequence<[SwiftWriteP01OneCycle], 2>;
   1116   def SwiftWriteP01OneCycle3x : WriteSequence<[SwiftWriteP01OneCycle], 3>;
   1117   def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; }
   1118   def SwiftWriteP01ThreeCycleTwoUops : SchedWriteRes<[SwiftUnitP01,
   1119                                                       SwiftUnitP01]> {
   1120     let Latency = 3;
   1121     let NumMicroOps = 2;
   1122   }
   1123   def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> {
   1124     let Latency = 3;
   1125     let NumMicroOps = 3;
   1126     let ResourceCycles = [3];
   1127   }
   1128   // Plain load without writeback.
   1129   def SwiftWriteP2ThreeCycle : SchedWriteRes<[SwiftUnitP2]> {
   1130     let Latency = 3;
   1131   }
   1132   def SwiftWriteP2FourCycle : SchedWriteRes<[SwiftUnitP2]> {
   1133     let Latency = 4;
   1134   }
   1135   // A store does not write to a register.
   1136   def SwiftWriteP2 : SchedWriteRes<[SwiftUnitP2]> {
   1137     let Latency = 0;
   1138   }
   1139   foreach Num = 1-4 in {
   1140     def SwiftWrite#Num#xP2 : WriteSequence<[SwiftWriteP2], Num>;
   1141   }
   1142   def SwiftWriteP01OneCycle2x_load : WriteSequence<[SwiftWriteP01OneCycle,
   1143                                                     SwiftWriteP01OneCycle,
   1144                                                     SwiftWriteP2ThreeCycle]>;
   1145   // 4.2.4 Arithmetic and Logical.
   1146   // ALU operation register shifted by immediate variant.
   1147   def SwiftWriteALUsi : SchedWriteVariant<[
   1148     // lsl #2, lsl #1, or lsr #1.
   1149     SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01TwoCycle]>,
   1150     SchedVar<NoSchedPred,             [WriteALU]>
   1151   ]>;
   1152   def SwiftWriteALUsr : SchedWriteVariant<[
   1153     SchedVar<IsPredicatedPred, [SwiftWriteP01ThreeCycleTwoUops]>,
   1154     SchedVar<NoSchedPred,      [SwiftWriteP01TwoCycle]>
   1155   ]>;
   1156   def SwiftWriteALUSsr : SchedWriteVariant<[
   1157     SchedVar<IsPredicatedPred, [SwiftWriteP0ThreeCycleThreeUops]>,
   1158     SchedVar<NoSchedPred,      [SwiftWriteP01TwoCycle]>
   1159   ]>;
   1160   def SwiftReadAdvanceALUsr : SchedReadVariant<[
   1161     SchedVar<IsPredicatedPred, [SchedReadAdvance<2>]>,
   1162     SchedVar<NoSchedPred,      [NoReadAdvance]>
   1163   ]>;
   1164   // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR
   1165   // AND,BIC,EOR,ORN,ORR
   1166   // CLZ,RBIT,REV,REV16,REVSH,PKH
   1167   def : WriteRes<WriteALU, [SwiftUnitP01]>;
   1168   def : SchedAlias<WriteALUsi, SwiftWriteALUsi>;
   1169   def : SchedAlias<WriteALUsr, SwiftWriteALUsr>;
   1170   def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
   1171   def : ReadAdvance<ReadALU, 0>;
   1172   def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
   1173 
   1174 
   1175   def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[
   1176     SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01OneCycle]>,
   1177     SchedVar<NoSchedPred,             [SwiftWriteP01TwoCycle]>
   1178   ]>;
   1179 
   1180   // 4.2.5 Integer comparison
   1181   def : WriteRes<WriteCMP, [SwiftUnitP01]>;
   1182   def : SchedAlias<WriteCMPsi, SwiftChooseShiftKindP01OneOrTwoCycle>;
   1183   def : SchedAlias<WriteCMPsr, SwiftWriteP01TwoCycle>;
   1184 
   1185   // 4.2.6 Shift, Move
   1186   // Shift
   1187   //  ASR,LSL,ROR,RRX
   1188   //  MOV(register-shiftedregister)  MVN(register-shiftedregister)
   1189   // Move
   1190   //  MOV,MVN
   1191   //  MOVT
   1192   // Sign/Zero extension
   1193   def : InstRW<[SwiftWriteP01OneCycle],
   1194                (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
   1195                           "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH",
   1196                           "t2UXTB16")>;
   1197   // Pseudo instructions.
   1198   def : InstRW<[SwiftWriteP01OneCycle2x],
   1199         (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi32imm",
   1200                    "t2MOVi32imm", "t2MOV_ga_dyn")>;
   1201   def : InstRW<[SwiftWriteP01OneCycle3x],
   1202         (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel", "t2MOVi16_ga_pcrel")>;
   1203   def : InstRW<[SwiftWriteP01OneCycle2x_load],
   1204         (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
   1205 
   1206   def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
   1207 
   1208   def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[
   1209     SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>,
   1210     SchedVar<NoSchedPred,     [ SwiftWriteP0OneCycle ]>
   1211   ]>;
   1212 
   1213   // 4.2.7 Select
   1214   // SEL
   1215   def : InstRW<[SwiftPredP0OneOrTwoCycle], (instregex "SEL", "t2SEL")>;
   1216 
   1217   // 4.2.8 Bitfield
   1218   // BFI,BFC, SBFX,UBFX
   1219   def : InstRW< [SwiftWriteP01TwoCycle],
   1220         (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
   1221         "(t|t2)UBFX", "(t|t2)SBFX")>;
   1222 
   1223   // 4.2.9 Saturating arithmetic
   1224   def : InstRW< [SwiftWriteP01TwoCycle],
   1225         (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
   1226         "USAT16", "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
   1227         "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
   1228         "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
   1229         "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
   1230         "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>;
   1231 
   1232   // 4.2.10 Parallel Arithmetic
   1233   // Not flag setting.
   1234   def : InstRW< [SwiftWriteALUsr],
   1235         (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
   1236         "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
   1237         "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
   1238         "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
   1239   // Flag setting.
   1240   def : InstRW< [SwiftWriteP01TwoCycle],
   1241        (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
   1242        "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
   1243        "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
   1244        "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
   1245        "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
   1246        "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
   1247 
   1248   // 4.2.11 Sum of Absolute Difference
   1249   def : InstRW< [SwiftWriteP0P1FourCycle], (instregex "USAD8") >;
   1250   def : InstRW<[SwiftWriteP0P1FourCycle, ReadALU, ReadALU, SchedReadAdvance<2>],
   1251         (instregex "USADA8")>;
   1252 
   1253   // 4.2.12 Integer Multiply (32-bit result)
   1254   // Two sources.
   1255   def : InstRW< [SwiftWriteP0FourCycle],
   1256         (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
   1257         "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
   1258         "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
   1259         "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
   1260 
   1261   def SwiftWriteP0P01FiveCycleTwoUops :
   1262       SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]>  {
   1263     let Latency = 5;
   1264   }
   1265 
   1266   def SwiftPredP0P01FourFiveCycle : SchedWriteVariant<[
   1267     SchedVar<IsPredicatedPred, [ SwiftWriteP0P01FiveCycleTwoUops ]>,
   1268     SchedVar<NoSchedPred,      [ SwiftWriteP0FourCycle ]>
   1269   ]>;
   1270 
   1271   def SwiftReadAdvanceFourCyclesPred : SchedReadVariant<[
   1272      SchedVar<IsPredicatedPred, [SchedReadAdvance<4>]>,
   1273      SchedVar<NoSchedPred,      [ReadALU]>
   1274   ]>;
   1275 
   1276   // Multiply accumulate, three sources
   1277   def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
   1278                  SwiftReadAdvanceFourCyclesPred],
   1279         (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
   1280         "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS",
   1281         "t2SMMLSR")>;
   1282 
   1283   // 4.2.13 Integer Multiply (32-bit result, Q flag)
   1284   def : InstRW< [SwiftWriteP0FourCycle],
   1285         (instregex "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX")>;
   1286   def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
   1287                  SwiftReadAdvanceFourCyclesPred],
   1288         (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
   1289         "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
   1290         "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT")>;
   1291   def : InstRW< [SwiftPredP0P01FourFiveCycle],
   1292         (instregex "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX")>;
   1293 
   1294   def SwiftP0P0P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
   1295     let Latency = 5;
   1296     let NumMicroOps = 3;
   1297     let ResourceCycles = [2, 1];
   1298   }
   1299   def SwiftWrite1Cycle : SchedWriteRes<[]> {
   1300     let Latency = 1;
   1301     let NumMicroOps = 0;
   1302   }
   1303   def SwiftWrite5Cycle : SchedWriteRes<[]> {
   1304     let Latency = 5;
   1305     let NumMicroOps = 0;
   1306   }
   1307   def SwiftWrite6Cycle : SchedWriteRes<[]> {
   1308     let Latency = 6;
   1309     let NumMicroOps = 0;
   1310   }
   1311 
   1312   // 4.2.14 Integer Multiply, Long
   1313   def : InstRW< [SwiftP0P0P01FiveCycle, SwiftWrite5Cycle],
   1314         (instregex "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$")>;
   1315 
   1316   def Swift2P03P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
   1317     let Latency = 7;
   1318     let NumMicroOps = 5;
   1319     let ResourceCycles = [2, 3];
   1320   }
   1321 
   1322   // 4.2.15 Integer Multiply Accumulate, Long
   1323   // 4.2.16 Integer Multiply Accumulate, Dual
   1324   // 4.2.17 Integer Multiply Accumulate Accumulate, Long
   1325   // We are being a bit inaccurate here.
   1326   def : InstRW< [SwiftWrite5Cycle, Swift2P03P01FiveCycle, ReadALU, ReadALU,
   1327                  SchedReadAdvance<4>, SchedReadAdvance<3>],
   1328         (instregex "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
   1329         "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
   1330         "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB", "t2SMLALBT",
   1331         "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", "t2SMLSLD", "t2SMLSLDX",
   1332         "t2UMAAL")>;
   1333 
   1334   def SwiftDiv : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
   1335     let NumMicroOps = 1;
   1336     let Latency = 14;
   1337     let ResourceCycles = [1, 14];
   1338   }
   1339   // 4.2.18 Integer Divide
   1340   def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround.
   1341   def : InstRW <[SwiftDiv],
   1342         (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
   1343 
   1344   // 4.2.19 Integer Load Single Element
   1345   // 4.2.20 Integer Load Signextended
   1346   def SwiftWriteP2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
   1347     let Latency = 3;
   1348   }
   1349   def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
   1350     let Latency = 4;
   1351   }
   1352   def SwiftWriteP2P01P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01,
   1353                                                    SwiftUnitP01]> {
   1354     let Latency = 4;
   1355   }
   1356   def SwiftWriteP2P2ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2]> {
   1357     let Latency = 3;
   1358   }
   1359   def SwiftWriteP2P2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2,
   1360                                                     SwiftUnitP01]> {
   1361     let Latency = 3;
   1362   }
   1363   def SwiftWrBackOne : SchedWriteRes<[]> {
   1364     let Latency = 1;
   1365     let NumMicroOps = 0;
   1366   }
   1367   def SwiftWriteLdFour : SchedWriteRes<[]> {
   1368     let Latency = 4;
   1369     let NumMicroOps = 0;
   1370   }
   1371    // Not accurate.
   1372   def : InstRW<[SwiftWriteP2ThreeCycle],
   1373         (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
   1374         "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "tLDR[BH](r|i|spi|pci|pciASM)",
   1375         "tLDR(r|i|spi|pci|pciASM)")>;
   1376   def : InstRW<[SwiftWriteP2ThreeCycle],
   1377         (instregex "LDRH$",  "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>;
   1378   def : InstRW<[SwiftWriteP2P01FourCyle],
   1379         (instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
   1380         "t2LDRpci_pic", "tLDRS(B|H)")>;
   1381   def : InstRW<[SwiftWriteP2P01ThreeCycle,  SwiftWrBackOne],
   1382         (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
   1383         "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
   1384         "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T")>;
   1385   def : InstRW<[SwiftWriteP2P01P01FourCycle, SwiftWrBackOne],
   1386         (instregex "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
   1387         "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T")>;
   1388 
   1389   // 4.2.21 Integer Dual Load
   1390   // Not accurate.
   1391   def : InstRW<[SwiftWriteP2P2ThreeCycle, SwiftWriteLdFour],
   1392         (instregex "t2LDRDi8", "LDRD$")>;
   1393   def : InstRW<[SwiftWriteP2P2P01ThreeCycle, SwiftWriteLdFour, SwiftWrBackOne],
   1394         (instregex "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
   1395 
   1396   // 4.2.22 Integer Load, Multiple
   1397   // NumReg = 1 .. 16
   1398   foreach Lat = 3-25 in {
   1399     def SwiftWriteLM#Lat#Cy : SchedWriteRes<[SwiftUnitP2]> {
   1400       let Latency = Lat;
   1401     }
   1402     def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> { let Latency = Lat; }
   1403   }
   1404   // Predicate.
   1405   foreach NumAddr = 1-16 in {
   1406     def SwiftLMAddr#NumAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
   1407   }
   1408   def SwiftWriteLDMAddrNoWB : SchedWriteRes<[SwiftUnitP01]> { let Latency = 0; }
   1409   def SwiftWriteLDMAddrWB : SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]>;
   1410   def SwiftWriteLM : SchedWriteVariant<[
   1411     SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy]>,
   1412     SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1413                                 SwiftWriteLM5Cy]>,
   1414     SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1415                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy]>,
   1416     SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1417                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1418                                 SwiftWriteLM7Cy]>,
   1419     SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1420                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1421                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy]>,
   1422     SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1423                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1424                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1425                                 SwiftWriteLM9Cy]>,
   1426     SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1427                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1428                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1429                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy]>,
   1430     SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1431                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1432                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1433                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1434                                 SwiftWriteLM11Cy]>,
   1435     SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1436                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1437                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1438                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1439                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy]>,
   1440     SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1441                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1442                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1443                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1444                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
   1445                                 SwiftWriteLM13Cy]>,
   1446     SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1447                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1448                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1449                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1450                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
   1451                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy]>,
   1452     SchedVar<SwiftLMAddr13Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1453                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1454                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1455                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1456                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
   1457                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
   1458                                 SwiftWriteLM15Cy]>,
   1459     SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1460                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1461                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1462                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1463                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
   1464                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
   1465                                 SwiftWriteLM15Cy, SwiftWriteLM16Cy]>,
   1466     SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1467                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1468                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1469                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1470                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
   1471                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
   1472                                 SwiftWriteLM15Cy, SwiftWriteLM16Cy,
   1473                                 SwiftWriteLM17Cy]>,
   1474     SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1475                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1476                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1477                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1478                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
   1479                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
   1480                                 SwiftWriteLM15Cy, SwiftWriteLM16Cy,
   1481                                 SwiftWriteLM17Cy, SwiftWriteLM18Cy]>,
   1482     // Unknow number of registers, just use resources for two registers.
   1483     SchedVar<NoSchedPred,      [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1484                                 SwiftWriteLM5CyNo, SwiftWriteLM6CyNo,
   1485                                 SwiftWriteLM7CyNo, SwiftWriteLM8CyNo,
   1486                                 SwiftWriteLM9CyNo, SwiftWriteLM10CyNo,
   1487                                 SwiftWriteLM11CyNo, SwiftWriteLM12CyNo,
   1488                                 SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
   1489                                 SwiftWriteLM15CyNo, SwiftWriteLM16CyNo,
   1490                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo]>
   1491 
   1492   ]> { let Variadic=1; }
   1493 
   1494   def : InstRW<[SwiftWriteLM, SwiftWriteLDMAddrNoWB],
   1495         (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
   1496         "(t|sys)LDM(IA|DA|DB|IB)$")>;
   1497   def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM],
   1498         (instregex /*"t2LDMIA_RET", "tLDMIA_RET", "LDMIA_RET",*/
   1499         "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
   1500   def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM, SwiftWriteP1TwoCycle],
   1501         (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
   1502   // 4.2.23 Integer Store, Single Element
   1503   def : InstRW<[SwiftWriteP2],
   1504         (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX",
   1505         "t2STR(i12|i8|s)$", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
   1506 
   1507   def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2],
   1508         (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
   1509         "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
   1510         "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
   1511         "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
   1512 
   1513   // 4.2.24 Integer Store, Dual
   1514   def : InstRW<[SwiftWriteP2, SwiftWriteP2, SwiftWriteP01OneCycle],
   1515         (instregex "STRD$", "t2STRDi8")>;
   1516   def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2, SwiftWriteP2,
   1517                 SwiftWriteP01OneCycle],
   1518         (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
   1519 
   1520   // 4.2.25 Integer Store, Multiple
   1521   def SwiftWriteStIncAddr : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
   1522     let Latency = 0;
   1523   }
   1524   foreach NumAddr = 1-16 in {
   1525      def SwiftWriteSTM#NumAddr : WriteSequence<[SwiftWriteStIncAddr], NumAddr>;
   1526   }
   1527   def SwiftWriteSTM : SchedWriteVariant<[
   1528     SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM2]>,
   1529     SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM3]>,
   1530     SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM4]>,
   1531     SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM5]>,
   1532     SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM6]>,
   1533     SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM7]>,
   1534     SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM8]>,
   1535     SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM9]>,
   1536     SchedVar<SwiftLMAddr10Pred,[SwiftWriteSTM10]>,
   1537     SchedVar<SwiftLMAddr11Pred,[SwiftWriteSTM11]>,
   1538     SchedVar<SwiftLMAddr12Pred,[SwiftWriteSTM12]>,
   1539     SchedVar<SwiftLMAddr13Pred,[SwiftWriteSTM13]>,
   1540     SchedVar<SwiftLMAddr14Pred,[SwiftWriteSTM14]>,
   1541     SchedVar<SwiftLMAddr15Pred,[SwiftWriteSTM15]>,
   1542     SchedVar<SwiftLMAddr16Pred,[SwiftWriteSTM16]>,
   1543     // Unknow number of registers, just use resources for two registers.
   1544     SchedVar<NoSchedPred,      [SwiftWriteSTM2]>
   1545   ]>;
   1546   def : InstRW<[SwiftWriteSTM],
   1547         (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
   1548   def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteSTM],
   1549         (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
   1550         "PUSH", "tPUSH")>;
   1551 
   1552   // 4.2.26 Branch
   1553   def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; }
   1554   def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; }
   1555   def : WriteRes<WriteBrTbl, [SwiftUnitP1, SwiftUnitP2]> { let Latency = 0; }
   1556 
   1557   // 4.2.27 Not issued
   1558   def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
   1559   def : InstRW<[WriteNoop], (instregex "t2IT", "IT", "NOP")>;
   1560 
   1561   // 4.2.28 Advanced SIMD, Integer, 2 cycle
   1562   def : InstRW<[SwiftWriteP0TwoCycle],
   1563         (instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL",
   1564                    "VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi",
   1565                    "VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST",
   1566                    "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL", "VQSHLU", "VBIF",
   1567                    "VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
   1568 
   1569   def : InstRW<[SwiftWriteP1TwoCycle],
   1570         (instregex "VEXT", "VREV16", "VREV32", "VREV64")>;
   1571 
   1572   // 4.2.29 Advanced SIMD, Integer, 4 cycle
   1573   // 4.2.30 Advanced SIMD, Integer with Accumulate
   1574   def : InstRW<[SwiftWriteP0FourCycle],
   1575         (instregex "VABA", "VABAL", "VPADAL", "VRSRA", "VSRA", "VACGE", "VACGT",
   1576         "VACLE", "VACLT", "VCEQ", "VCGE", "VCGT", "VCLE", "VCLT", "VRSHL",
   1577         "VQRSHL", "VRSHR(u|s)", "VABS(f|v)", "VQABS", "VQNEG", "VQADD",
   1578         "VQSUB")>;
   1579   def : InstRW<[SwiftWriteP1FourCycle],
   1580         (instregex "VRECPE", "VRSQRTE")>;
   1581 
   1582   // 4.2.31 Advanced SIMD, Add and Shift with Narrow
   1583   def : InstRW<[SwiftWriteP0P1FourCycle],
   1584         (instregex "VADDHN", "VSUBHN", "VSHRN")>;
   1585   def : InstRW<[SwiftWriteP0P1SixCycle],
   1586         (instregex "VRADDHN", "VRSUBHN", "VRSHRN", "VQSHRN", "VQSHRUN",
   1587                    "VQRSHRN", "VQRSHRUN")>;
   1588 
   1589   // 4.2.32 Advanced SIMD, Vector Table Lookup
   1590   foreach Num = 1-4 in {
   1591     def SwiftWrite#Num#xP1TwoCycle : WriteSequence<[SwiftWriteP1TwoCycle], Num>;
   1592   }
   1593   def : InstRW<[SwiftWrite1xP1TwoCycle],
   1594         (instregex "VTB(L|X)1")>;
   1595   def : InstRW<[SwiftWrite2xP1TwoCycle],
   1596         (instregex "VTB(L|X)2")>;
   1597   def : InstRW<[SwiftWrite3xP1TwoCycle],
   1598         (instregex "VTB(L|X)3")>;
   1599   def : InstRW<[SwiftWrite4xP1TwoCycle],
   1600         (instregex "VTB(L|X)4")>;
   1601 
   1602   // 4.2.33 Advanced SIMD, Transpose
   1603   def : InstRW<[SwiftWriteP1FourCycle, SwiftWriteP1FourCycle,
   1604                 SwiftWriteP1TwoCycle/*RsrcOnly*/, SchedReadAdvance<2>],
   1605         (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
   1606 
   1607   // 4.2.34 Advanced SIMD and VFP, Floating Point
   1608   def : InstRW<[SwiftWriteP0TwoCycle], (instregex "VABS(S|D)$", "VNEG(S|D)$")>;
   1609   def : InstRW<[SwiftWriteP0FourCycle],
   1610         (instregex "VCMP(D|S|ZD|ZS)$", "VCMPE(D|S|ZD|ZS)")>;
   1611   def : InstRW<[SwiftWriteP0FourCycle],
   1612         (instregex "VADD(S|f)", "VSUB(S|f)", "VABD", "VPADDf", "VMAX", "VMIN", "VPMAX",
   1613                    "VPMIN")>;
   1614   def : InstRW<[SwiftWriteP0SixCycle], (instregex "VADDD$", "VSUBD$")>;
   1615   def : InstRW<[SwiftWriteP1EightCycle], (instregex "VRECPS", "VRSQRTS")>;
   1616 
   1617   // 4.2.35 Advanced SIMD and VFP, Multiply
   1618   def : InstRW<[SwiftWriteP1FourCycle],
   1619         (instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH",
   1620                    "VMULL", "VQDMULL")>;
   1621   def : InstRW<[SwiftWriteP1SixCycle],
   1622         (instregex "VMULD", "VNMULD")>;
   1623   def : InstRW<[SwiftWriteP1FourCycle],
   1624         (instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)",
   1625         "VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>;
   1626   def : InstRW<[SwiftWriteP1EightCycle], (instregex "VFMAfd", "VFMSfd")>;
   1627   def : InstRW<[SwiftWriteP1TwelveCyc], (instregex "VFMAfq", "VFMSfq")>;
   1628 
   1629   // 4.2.36 Advanced SIMD and VFP, Convert
   1630   def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>;
   1631   // Fixpoint conversions.
   1632   def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; }
   1633 
   1634   // 4.2.37 Advanced SIMD and VFP, Move
   1635   def : InstRW<[SwiftWriteP0TwoCycle],
   1636         (instregex "VMOVv", "VMOV(S|D)$", "VMOV(S|D)cc",
   1637                    "VMVNv", "VMVN(d|q)", "VMVN(S|D)cc",
   1638                    "FCONST(D|S)")>;
   1639   def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VMOVN", "VMOVL")>;
   1640   def : InstRW<[WriteSequence<[SwiftWriteP0FourCycle, SwiftWriteP1TwoCycle]>],
   1641         (instregex "VQMOVN")>;
   1642   def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VDUPLN", "VDUPf")>;
   1643   def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>],
   1644         (instregex "VDUP(8|16|32)")>;
   1645   def : InstRW<[SwiftWriteP2ThreeCycle], (instregex "VMOVRS$")>;
   1646   def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP0TwoCycle]>],
   1647         (instregex "VMOVSR$", "VSETLN")>;
   1648   def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2FourCycle],
   1649         (instregex "VMOVRR(D|S)$")>;
   1650   def : InstRW<[SwiftWriteP2FourCycle], (instregex "VMOVDRR$")>;
   1651   def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>,
   1652                 WriteSequence<[SwiftWrite1Cycle, SwiftWriteP2FourCycle,
   1653                                SwiftWriteP1TwoCycle]>],
   1654                 (instregex "VMOVSRR$")>;
   1655   def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle]>],
   1656         (instregex "VGETLN(u|i)")>;
   1657   def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle,
   1658                                SwiftWriteP01OneCycle]>],
   1659         (instregex "VGETLNs")>;
   1660 
   1661   // 4.2.38 Advanced SIMD and VFP, Move FPSCR
   1662   // Serializing instructions.
   1663   def SwiftWaitP0For15Cy : SchedWriteRes<[SwiftUnitP0]> {
   1664     let Latency = 15;
   1665     let ResourceCycles = [15];
   1666   }
   1667   def SwiftWaitP1For15Cy : SchedWriteRes<[SwiftUnitP1]> {
   1668     let Latency = 15;
   1669     let ResourceCycles = [15];
   1670   }
   1671   def SwiftWaitP2For15Cy : SchedWriteRes<[SwiftUnitP2]> {
   1672     let Latency = 15;
   1673     let ResourceCycles = [15];
   1674   }
   1675   def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
   1676         (instregex "VMRS")>;
   1677   def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
   1678         (instregex "VMSR")>;
   1679   // Not serializing.
   1680   def : InstRW<[SwiftWriteP0TwoCycle], (instregex "FMSTAT")>;
   1681 
   1682   // 4.2.39 Advanced SIMD and VFP, Load Single Element
   1683   def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDRD$", "VLDRS$")>;
   1684 
   1685   // 4.2.40 Advanced SIMD and VFP, Store Single Element
   1686   def : InstRW<[SwiftWriteLM4Cy], (instregex "VSTRD$", "VSTRS$")>;
   1687 
   1688   // 4.2.41 Advanced SIMD and VFP, Load Multiple
   1689   // 4.2.42 Advanced SIMD and VFP, Store Multiple
   1690 
   1691   // Resource requirement for permuting, just reserves the resources.
   1692   foreach Num = 1-28 in {
   1693     def SwiftVLDMPerm#Num : SchedWriteRes<[SwiftUnitP1]> {
   1694       let Latency = 0;
   1695       let NumMicroOps = Num;
   1696       let ResourceCycles = [Num];
   1697     }
   1698   }
   1699 
   1700   // Pre RA pseudos - load/store to a Q register as a D register pair.
   1701   def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDMQIA$", "VSTMQIA$")>;
   1702 
   1703   // Post RA not modelled accurately. We assume that register use of width 64
   1704   // bit maps to a D register, 128 maps to a Q register. Not all different kinds
   1705   // are accurately represented.
   1706   def SwiftWriteVLDM : SchedWriteVariant<[
   1707     // Load of one S register.
   1708     SchedVar<SwiftLMAddr1Pred, [SwiftWriteLM4Cy]>,
   1709     // Load of one D register.
   1710     SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo]>,
   1711     // Load of 3 S register.
   1712     SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1713                                 SwiftWriteLM13CyNo, SwiftWriteP01OneCycle,
   1714                                 SwiftVLDMPerm3]>,
   1715     // Load of a Q register (not neccessarily true). We should not be mapping to
   1716     // 4 S registers, either.
   1717     SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo,
   1718                                 SwiftWriteLM4CyNo, SwiftWriteLM4CyNo]>,
   1719     // Load of 5 S registers.
   1720     SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1721                                 SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
   1722                                 SwiftWriteLM17CyNo,  SwiftWriteP01OneCycle,
   1723                                 SwiftVLDMPerm5]>,
   1724     // Load of 3 D registers. (Must also be able to handle s register list -
   1725     // though, not accurate)
   1726     SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1727                                 SwiftWriteLM10Cy, SwiftWriteLM14CyNo,
   1728                                 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
   1729                                 SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
   1730     // Load of 7 S registers.
   1731     SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1732                                 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
   1733                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
   1734                                 SwiftWriteLM21CyNo, SwiftWriteP01OneCycle,
   1735                                 SwiftVLDMPerm7]>,
   1736     // Load of two Q registers.
   1737     SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1738                                 SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
   1739                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1740                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1741                                 SwiftWriteP01OneCycle,  SwiftVLDMPerm2]>,
   1742     // Load of 9 S registers.
   1743     SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1744                                 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
   1745                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
   1746                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1747                                 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
   1748                                 SwiftVLDMPerm9]>,
   1749     // Load of 5 D registers.
   1750     SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1751                                 SwiftWriteLM10Cy, SwiftWriteLM14Cy,
   1752                                 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
   1753                                 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
   1754                                 SwiftWriteLM14CyNo,  SwiftWriteLM14CyNo,
   1755                                 SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
   1756     // Inaccurate: reuse describtion from 9 S registers.
   1757     SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1758                                 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
   1759                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
   1760                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1761                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1762                                 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
   1763                                 SwiftVLDMPerm9]>,
   1764     // Load of three Q registers.
   1765     SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1766                                 SwiftWriteLM11Cy, SwiftWriteLM11Cy,
   1767                                 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
   1768                                 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
   1769                                 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
   1770                                 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
   1771                                 SwiftWriteP01OneCycle, SwiftVLDMPerm3]>,
   1772     // Inaccurate: reuse describtion from 9 S registers.
   1773     SchedVar<SwiftLMAddr13Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1774                                 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
   1775                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
   1776                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1777                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1778                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1779                                 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
   1780                                 SwiftVLDMPerm9]>,
   1781     // Load of 7 D registers inaccurate.
   1782     SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1783                                 SwiftWriteLM10Cy, SwiftWriteLM14Cy,
   1784                                 SwiftWriteLM14Cy, SwiftWriteLM14CyNo,
   1785                                 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
   1786                                 SwiftWriteLM14CyNo,  SwiftWriteLM14CyNo,
   1787                                 SwiftWriteLM14CyNo,  SwiftWriteLM14CyNo,
   1788                                 SwiftWriteP01OneCycle, SwiftVLDMPerm7]>,
   1789     SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1790                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
   1791                                 SwiftWriteLM17Cy, SwiftWriteLM18CyNo,
   1792                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1793                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1794                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1795                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1796                                 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
   1797                                 SwiftVLDMPerm9]>,
   1798     // Load of 4 Q registers.
   1799     SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM7Cy, SwiftWriteLM10Cy,
   1800                                 SwiftWriteLM11Cy, SwiftWriteLM14Cy,
   1801                                 SwiftWriteLM15Cy, SwiftWriteLM18CyNo,
   1802                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
   1803                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
   1804                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
   1805                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
   1806                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
   1807                                 SwiftWriteP01OneCycle, SwiftVLDMPerm4]>,
   1808     // Unknow number of registers, just use resources for two registers.
   1809     SchedVar<NoSchedPred,      [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1810                                 SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
   1811                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1812                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1813                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1814                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1815                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1816                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1817                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1818                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1819                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1820                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1821                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1822                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1823                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1824                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1825                                 SwiftWriteP01OneCycle,  SwiftVLDMPerm2]>
   1826   ]> { let Variadic = 1; }
   1827 
   1828   def : InstRW<[SwiftWriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
   1829 
   1830   def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVLDM],
   1831         (instregex "VLDM[SD](IA|DB)_UPD$")>;
   1832 
   1833   def SwiftWriteVSTM : SchedWriteVariant<[
   1834     // One S register.
   1835     SchedVar<SwiftLMAddr1Pred, [SwiftWriteSTM1]>,
   1836     // One D register.
   1837     SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM1]>,
   1838     // Three S registers.
   1839     SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM4]>,
   1840     // Assume one Q register.
   1841     SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM1]>,
   1842     SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM6]>,
   1843     // Assume three D registers.
   1844     SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM4]>,
   1845     SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM8]>,
   1846     // Assume two Q registers.
   1847     SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM3]>,
   1848     SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM10]>,
   1849     // Assume 5 D registers.
   1850     SchedVar<SwiftLMAddr10Pred, [SwiftWriteSTM6]>,
   1851     SchedVar<SwiftLMAddr11Pred, [SwiftWriteSTM12]>,
   1852     // Asume three Q registers.
   1853     SchedVar<SwiftLMAddr12Pred, [SwiftWriteSTM4]>,
   1854     SchedVar<SwiftLMAddr13Pred, [SwiftWriteSTM14]>,
   1855     // Assume 7 D registers.
   1856     SchedVar<SwiftLMAddr14Pred, [SwiftWriteSTM8]>,
   1857     SchedVar<SwiftLMAddr15Pred, [SwiftWriteSTM16]>,
   1858     // Assume four Q registers.
   1859     SchedVar<SwiftLMAddr16Pred, [SwiftWriteSTM5]>,
   1860     // Asumme two Q registers.
   1861     SchedVar<NoSchedPred, [SwiftWriteSTM3]>
   1862   ]> { let Variadic = 1; }
   1863 
   1864   def : InstRW<[SwiftWriteVSTM], (instregex "VSTM[SD](IA|DB)$")>;
   1865 
   1866   def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVSTM],
   1867         (instregex "VSTM[SD](IA|DB)_UPD")>;
   1868 
   1869   // 4.2.43 Advanced SIMD, Element or Structure Load and Store
   1870   def SwiftWrite2xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
   1871       let Latency = 4;
   1872       let ResourceCycles = [2];
   1873   }
   1874   def SwiftWrite3xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
   1875       let Latency = 4;
   1876       let ResourceCycles = [3];
   1877   }
   1878   foreach Num = 1-2 in {
   1879     def SwiftExt#Num#xP0 : SchedWriteRes<[SwiftUnitP0]> {
   1880       let Latency = 0;
   1881       let NumMicroOps = Num;
   1882       let ResourceCycles = [Num];
   1883     }
   1884   }
   1885   // VLDx
   1886   // Multiple structures.
   1887   // Single element structure loads.
   1888   // We assume aligned.
   1889   // Single/two register.
   1890   def : InstRW<[SwiftWriteLM4Cy], (instregex "VLD1(d|q)(8|16|32|64)$")>;
   1891   def : InstRW<[SwiftWriteLM4Cy, SwiftWriteP01OneCycle],
   1892         (instregex "VLD1(d|q)(8|16|32|64)wb")>;
   1893   // Three register.
   1894   def : InstRW<[SwiftWrite3xP2FourCy],
   1895         (instregex "VLD1(d|q)(8|16|32|64)T$", "VLD1d64TPseudo")>;
   1896   def : InstRW<[SwiftWrite3xP2FourCy, SwiftWriteP01OneCycle],
   1897         (instregex "VLD1(d|q)(8|16|32|64)Twb")>;
   1898   /// Four Register.
   1899   def : InstRW<[SwiftWrite2xP2FourCy],
   1900         (instregex "VLD1(d|q)(8|16|32|64)Q$", "VLD1d64QPseudo")>;
   1901   def : InstRW<[SwiftWrite2xP2FourCy, SwiftWriteP01OneCycle],
   1902         (instregex "VLD1(d|q)(8|16|32|64)Qwb")>;
   1903   // Two element structure loads.
   1904   // Two/four register.
   1905   def : InstRW<[SwiftWriteLM9Cy, SwiftExt2xP0, SwiftVLDMPerm2],
   1906         (instregex "VLD2(d|q|b)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>;
   1907   def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
   1908                 SwiftVLDMPerm2],
   1909         (instregex "VLD2(d|q|b)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>;
   1910   // Three element structure.
   1911   def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
   1912                 SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
   1913         (instregex "VLD3(d|q)(8|16|32)$")>;
   1914   def : InstRW<[SwiftWriteLM9Cy, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
   1915         (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>;
   1916 
   1917   def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
   1918                 SwiftWriteP01OneCycle, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
   1919         (instregex "VLD3(d|q)(8|16|32)_UPD$")>;
   1920   def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm3,
   1921                 SwiftWrite3xP2FourCy],
   1922         (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
   1923   // Four element structure loads.
   1924   def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
   1925                 SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4,
   1926                 SwiftWrite3xP2FourCy],
   1927         (instregex "VLD4(d|q)(8|16|32)$")>;
   1928   def : InstRW<[SwiftWriteLM11Cy,  SwiftExt2xP0, SwiftVLDMPerm4,
   1929                 SwiftWrite3xP2FourCy],
   1930         (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>;
   1931   def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
   1932                 SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
   1933                 SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
   1934         (instregex "VLD4(d|q)(8|16|32)_UPD")>;
   1935   def : InstRW<[SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
   1936                 SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
   1937         (instregex  "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
   1938 
   1939   // Single all/lane loads.
   1940   // One element structure.
   1941   def : InstRW<[SwiftWriteLM6Cy, SwiftVLDMPerm2],
   1942         (instregex "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
   1943   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm2],
   1944         (instregex "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)",
   1945                   "VLD1LNq(8|16|32)Pseudo_UPD")>;
   1946   // Two element structure.
   1947   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftExt1xP0, SwiftVLDMPerm2],
   1948         (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$",
   1949                    "VLD2LN(d|q)(8|16|32)Pseudo$")>;
   1950   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftWriteP01OneCycle,
   1951                 SwiftExt1xP0, SwiftVLDMPerm2],
   1952         (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>;
   1953   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
   1954                 SwiftExt1xP0, SwiftVLDMPerm2],
   1955         (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb")>;
   1956   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
   1957                 SwiftExt1xP0, SwiftVLDMPerm2],
   1958         (instregex "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>;
   1959   // Three element structure.
   1960   def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy, SwiftExt1xP0,
   1961                 SwiftVLDMPerm3],
   1962         (instregex "VLD3(DUP|LN)(d|q)(8|16|32)$",
   1963                    "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
   1964   def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy,
   1965                 SwiftWriteP01OneCycle, SwiftExt1xP0, SwiftVLDMPerm3],
   1966         (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
   1967   def : InstRW<[SwiftWriteLM7Cy, SwiftWriteP01OneCycle, SwiftWriteLM8Cy,
   1968                 SwiftWriteLM8Cy, SwiftExt1xP0, SwiftVLDMPerm3],
   1969         (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
   1970   // Four element struture.
   1971   def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
   1972                 SwiftWriteLM10CyNo, SwiftExt1xP0, SwiftVLDMPerm5],
   1973         (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$",
   1974                    "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
   1975   def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
   1976                 SwiftWriteLM10CyNo, SwiftWriteP01OneCycle, SwiftExt1xP0,
   1977                 SwiftVLDMPerm5],
   1978         (instregex "VLD4(DUP|LN)(d|q)(8|16|32)_UPD")>;
   1979   def : InstRW<[SwiftWriteLM8Cy, SwiftWriteP01OneCycle, SwiftWriteLM9Cy,
   1980                 SwiftWriteLM10CyNo, SwiftWriteLM10CyNo, SwiftExt1xP0,
   1981                 SwiftVLDMPerm5],
   1982         (instregex "VLD4(DUP|LN)(d|q)(8|16|32)Pseudo_UPD")>;
   1983   // VSTx
   1984   // Multiple structures.
   1985   // Single element structure store.
   1986   def : InstRW<[SwiftWrite1xP2], (instregex "VST1d(8|16|32|64)$")>;
   1987   def : InstRW<[SwiftWrite2xP2], (instregex "VST1q(8|16|32|64)$")>;
   1988   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2],
   1989         (instregex "VST1d(8|16|32|64)wb")>;
   1990   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2],
   1991         (instregex "VST1q(8|16|32|64)wb")>;
   1992   def : InstRW<[SwiftWrite3xP2],
   1993         (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>;
   1994   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite3xP2],
   1995         (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>;
   1996   def : InstRW<[SwiftWrite4xP2],
   1997         (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>;
   1998   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2],
   1999         (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>;
   2000   // Two element structure store.
   2001   def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
   2002         (instregex "VST2(d|b)(8|16|32)$")>;
   2003   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
   2004         (instregex "VST2(b|d)(8|16|32)wb")>;
   2005   def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
   2006         (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>;
   2007   def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
   2008         (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>;
   2009   // Three element structure store.
   2010   def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
   2011         (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>;
   2012   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
   2013         (instregex "VST3(d|q)(8|16|32)_UPD",
   2014                    "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
   2015   // Four element structure store.
   2016   def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
   2017         (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>;
   2018   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm4],
   2019         (instregex "VST4(d|q)(8|16|32)_UPD",
   2020                    "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
   2021   // Single/all lane store.
   2022   // One element structure.
   2023   def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
   2024         (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>;
   2025   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
   2026         (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>;
   2027   // Two element structure.
   2028   def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm2],
   2029         (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>;
   2030   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm2],
   2031         (instregex "VST2LN(d|q)(8|16|32)_UPD",
   2032                    "VST2LN(d|q)(8|16|32)Pseudo_UPD")>;
   2033   // Three element structure.
   2034   def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
   2035         (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>;
   2036   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
   2037         (instregex "VST3LN(d|q)(8|16|32)_UPD",
   2038                    "VST3LN(d|q)(8|16|32)Pseudo_UPD")>;
   2039   // Four element structure.
   2040   def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
   2041         (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>;
   2042   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2, SwiftVLDMPerm2],
   2043         (instregex "VST4LN(d|q)(8|16|32)_UPD",
   2044                    "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
   2045 
   2046   // 4.2.44 VFP, Divide and Square Root
   2047   def SwiftDiv17 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
   2048     let NumMicroOps = 1;
   2049     let Latency = 17;
   2050     let ResourceCycles = [1, 15];
   2051   }
   2052   def SwiftDiv32 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
   2053     let NumMicroOps = 1;
   2054     let Latency = 32;
   2055     let ResourceCycles = [1, 30];
   2056   }
   2057   def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
   2058   def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
   2059 
   2060   // Not specified.
   2061   def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
   2062   // Preload.
   2063   def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0;
   2064     let ResourceCycles = [0];
   2065   }
   2066 
   2067 }
   2068