Home | History | Annotate | Download | only in ARM
      1 //=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the itinerary class data for the Swift processor..
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 // ===---------------------------------------------------------------------===//
     15 // This section contains legacy support for itineraries. This is
     16 // required until SD and PostRA schedulers are replaced by MachineScheduler.
     17 
     18 def SW_DIS0 : FuncUnit;
     19 def SW_DIS1 : FuncUnit;
     20 def SW_DIS2 : FuncUnit;
     21 
     22 def SW_ALU0 : FuncUnit;
     23 def SW_ALU1 : FuncUnit;
     24 def SW_LS   : FuncUnit;
     25 def SW_IDIV : FuncUnit;
     26 def SW_FDIV : FuncUnit;
     27 
     28 // FIXME: Need bypasses.
     29 // FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and
     30 //        IIC_iMOVix2ld better.
     31 // FIXME: Model the special immediate shifts that are not microcoded.
     32 // FIXME: Do we need to model the fact that uses of r15 in a micro-op force it
     33 //        to issue on pipe 1?
     34 // FIXME: Model the pipelined behavior of CMP / TST instructions.
     35 // FIXME: Better model the microcode stages of multiply instructions, especially
     36 //        conditional variants.
     37 // FIXME: Add preload instruction when it is documented.
     38 // FIXME: Model non-pipelined nature of FP div / sqrt unit.
     39 
     40 def SwiftItineraries : ProcessorItineraries<
     41   [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
     42   //
     43   // Move instructions, unconditional
     44   InstrItinData<IIC_iMOVi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     45                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     46                               [1]>,
     47   InstrItinData<IIC_iMOVr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     48                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     49                               [1]>,
     50   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     51                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     52                               [1]>,
     53   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     54                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     55                               [1]>,
     56   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     57                                InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     58                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
     59                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     60                               [2]>,
     61   InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     62                                   InstrStage<1, [SW_ALU0, SW_ALU1]>,
     63                                   InstrStage<1, [SW_ALU0, SW_ALU1]>,
     64                                   InstrStage<1, [SW_ALU0, SW_ALU1]>],
     65                                  [3]>,
     66   InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     67                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
     68                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
     69                                InstrStage<1, [SW_LS]>],
     70                               [5]>,
     71   //
     72   // MVN instructions
     73   InstrItinData<IIC_iMVNi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     74                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     75                               [1]>,
     76   InstrItinData<IIC_iMVNr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     77                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     78                               [1]>,
     79   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     80                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     81                               [1]>,
     82   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     83                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
     84                               [1]>,
     85   //
     86   // No operand cycles
     87   InstrItinData<IIC_iALUx   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     88                                InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
     89   //
     90   // Binary Instructions that produce a result
     91   InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     92                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
     93                             [1, 1]>,
     94   InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     95                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
     96                             [1, 1, 1]>,
     97   InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
     98                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
     99                             [2, 1, 1]>,
    100   InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    101                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    102                             [2, 1, 1]>,
    103   InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    104                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    105                             [2, 1, 1, 1]>,
    106   //
    107   // Bitwise Instructions that produce a result
    108   InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    109                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    110                             [1, 1]>,
    111   InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    112                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    113                             [1, 1, 1]>,
    114   InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    115                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    116                             [2, 1, 1]>,
    117   InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    118                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    119                             [2, 1, 1, 1]>,
    120   //
    121   // Unary Instructions that produce a result
    122 
    123   // CLZ, RBIT, etc.
    124   InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    125                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    126                             [1, 1]>,
    127 
    128   // BFC, BFI, UBFX, SBFX
    129   InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    130                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    131                             [2, 1]>,
    132 
    133   //
    134   // Zero and sign extension instructions
    135   InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    136                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    137                             [1, 1]>,
    138   InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    139                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    140                             [1, 1, 1]>,
    141   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    142                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
    143                             [1, 1, 1, 1]>,
    144   //
    145   // Compare instructions
    146   InstrItinData<IIC_iCMPi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    147                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    148                               [1]>,
    149   InstrItinData<IIC_iCMPr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    150                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    151                               [1, 1]>,
    152   InstrItinData<IIC_iCMPsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    153                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
    154                               [1, 1]>,
    155   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    156                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
    157                               [1, 1, 1]>,
    158   //
    159   // Test instructions
    160   InstrItinData<IIC_iTSTi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    161                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    162                               [1]>,
    163   InstrItinData<IIC_iTSTr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    164                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    165                               [1, 1]>,
    166   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    167                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
    168                               [1, 1]>,
    169   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    170                                InstrStage<2, [SW_ALU0, SW_ALU1]>],
    171                               [1, 1, 1]>,
    172   //
    173   // Move instructions, conditional
    174   // FIXME: Correctly model the extra input dep on the destination.
    175   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    176                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    177                               [1]>,
    178   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    179                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    180                               [1, 1]>,
    181   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    182                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    183                               [1, 1]>,
    184   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    185                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    186                               [2, 1, 1]>,
    187   InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    188                                InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    189                                InstrStage<1, [SW_ALU0, SW_ALU1]>,
    190                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    191                               [2]>,
    192 
    193   // Integer multiply pipeline
    194   //
    195   InstrItinData<IIC_iMUL16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    196                                InstrStage<1, [SW_ALU0]>],
    197                               [3, 1, 1]>,
    198   InstrItinData<IIC_iMAC16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    199                                InstrStage<1, [SW_ALU0]>],
    200                               [3, 1, 1, 1]>,
    201   InstrItinData<IIC_iMUL32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    202                                InstrStage<1, [SW_ALU0]>],
    203                               [4, 1, 1]>,
    204   InstrItinData<IIC_iMAC32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    205                                InstrStage<1, [SW_ALU0]>],
    206                               [4, 1, 1, 1]>,
    207   InstrItinData<IIC_iMUL64  , [InstrStage<1, [SW_DIS0], 0>,
    208                                InstrStage<1, [SW_DIS1], 0>,
    209                                InstrStage<1, [SW_DIS2], 0>,
    210                                InstrStage<1, [SW_ALU0], 1>,
    211                                InstrStage<1, [SW_ALU0], 3>,
    212                                InstrStage<1, [SW_ALU0]>],
    213                               [5, 5, 1, 1]>,
    214   InstrItinData<IIC_iMAC64  , [InstrStage<1, [SW_DIS0], 0>,
    215                                InstrStage<1, [SW_DIS1], 0>,
    216                                InstrStage<1, [SW_DIS2], 0>,
    217                                InstrStage<1, [SW_ALU0], 1>,
    218                                InstrStage<1, [SW_ALU0], 1>,
    219                                InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
    220                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    221                               [5, 6, 1, 1]>,
    222   //
    223   // Integer divide
    224   InstrItinData<IIC_iDIV  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    225                              InstrStage<1, [SW_ALU0], 0>,
    226                              InstrStage<14, [SW_IDIV]>],
    227                             [14, 1, 1]>,
    228 
    229   // Integer load pipeline
    230   // FIXME: The timings are some rough approximations
    231   //
    232   // Immediate offset
    233   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    234                                  InstrStage<1, [SW_LS]>],
    235                                 [3, 1]>,
    236   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    237                                  InstrStage<1, [SW_LS]>],
    238                                 [3, 1]>,
    239   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
    240                                  InstrStage<1, [SW_DIS1], 0>,
    241                                  InstrStage<1, [SW_LS], 1>,
    242                                  InstrStage<1, [SW_LS]>],
    243                                 [3, 4, 1]>,
    244   //
    245   // Register offset
    246   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    247                                  InstrStage<1, [SW_LS]>],
    248                                 [3, 1, 1]>,
    249   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    250                                  InstrStage<1, [SW_LS]>],
    251                                 [3, 1, 1]>,
    252   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
    253                                  InstrStage<1, [SW_DIS1], 0>,
    254                                  InstrStage<1, [SW_DIS2], 0>,
    255                                  InstrStage<1, [SW_LS], 1>,
    256                                  InstrStage<1, [SW_LS], 3>,
    257                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
    258                                 [3, 4, 1, 1]>,
    259   //
    260   // Scaled register offset
    261   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [SW_DIS0], 0>,
    262                                  InstrStage<1, [SW_DIS1], 0>,
    263                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    264                                  InstrStage<1, [SW_LS]>],
    265                                 [5, 1, 1]>,
    266   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
    267                                  InstrStage<1, [SW_DIS1], 0>,
    268                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    269                                  InstrStage<1, [SW_LS]>],
    270                                 [5, 1, 1]>,
    271   //
    272   // Immediate offset with update
    273   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [SW_DIS0], 0>,
    274                                  InstrStage<1, [SW_DIS1], 0>,
    275                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    276                                  InstrStage<1, [SW_LS]>],
    277                                 [3, 1, 1]>,
    278   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
    279                                  InstrStage<1, [SW_DIS1], 0>,
    280                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    281                                  InstrStage<1, [SW_LS]>],
    282                                 [3, 1, 1]>,
    283   //
    284   // Register offset with update
    285   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [SW_DIS0], 0>,
    286                                  InstrStage<1, [SW_DIS1], 0>,
    287                                  InstrStage<1, [SW_ALU0], 1>,
    288                                  InstrStage<1, [SW_LS]>],
    289                                 [3, 1, 1, 1]>,
    290   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
    291                                  InstrStage<1, [SW_DIS1], 0>,
    292                                  InstrStage<1, [SW_ALU0], 1>,
    293                                  InstrStage<1, [SW_LS]>],
    294                                 [3, 1, 1, 1]>,
    295   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
    296                                  InstrStage<1, [SW_DIS1], 0>,
    297                                  InstrStage<1, [SW_DIS2], 0>,
    298                                  InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
    299                                  InstrStage<1, [SW_LS], 3>,
    300                                  InstrStage<1, [SW_LS], 0>,
    301                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
    302                                 [3, 4, 1, 1]>,
    303   //
    304   // Scaled register offset with update
    305   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
    306                                  InstrStage<1, [SW_DIS1], 0>,
    307                                  InstrStage<1, [SW_DIS2], 0>,
    308                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    309                                  InstrStage<1, [SW_LS], 3>,
    310                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
    311                                 [5, 3, 1, 1]>,
    312   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
    313                                   InstrStage<1, [SW_DIS1], 0>,
    314                                   InstrStage<1, [SW_DIS2], 0>,
    315                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    316                                   InstrStage<1, [SW_LS], 0>,
    317                                   InstrStage<1, [SW_ALU0, SW_ALU1]>],
    318                                 [5, 3, 1, 1]>,
    319   //
    320   // Load multiple, def is the 5th operand.
    321   // FIXME: This assumes 3 to 4 registers.
    322   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [SW_DIS0], 0>,
    323                                 InstrStage<1, [SW_DIS1], 0>,
    324                                 InstrStage<1, [SW_DIS2], 0>,
    325                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    326                                 InstrStage<1, [SW_LS]>],
    327                                [1, 1, 1, 1, 3], [], -1>, // dynamic uops
    328 
    329   //
    330   // Load multiple + update, defs are the 1st and 5th operands.
    331   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
    332                                 InstrStage<1, [SW_DIS1], 0>,
    333                                 InstrStage<1, [SW_DIS2], 0>,
    334                                 InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
    335                                 InstrStage<1, [SW_LS], 3>,
    336                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
    337                                [2, 1, 1, 1, 3], [], -1>, // dynamic uops
    338   //
    339   // Load multiple plus branch
    340   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
    341                                 InstrStage<1, [SW_DIS1], 0>,
    342                                 InstrStage<1, [SW_DIS2], 0>,
    343                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    344                                 InstrStage<1, [SW_LS]>],
    345                                [1, 1, 1, 1, 3], [], -1>, // dynamic uops
    346   //
    347   // Pop, def is the 3rd operand.
    348   InstrItinData<IIC_iPop  ,    [InstrStage<1, [SW_DIS0], 0>,
    349                                 InstrStage<1, [SW_DIS1], 0>,
    350                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    351                                 InstrStage<1, [SW_LS]>],
    352                                [1, 1, 3], [], -1>, // dynamic uops
    353   //
    354   // Pop + branch, def is the 3rd operand.
    355   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [SW_DIS0], 0>,
    356                                 InstrStage<1, [SW_DIS1], 0>,
    357                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    358                                 InstrStage<1, [SW_LS]>],
    359                                [1, 1, 3], [], -1>, // dynamic uops
    360 
    361   //
    362   // iLoadi + iALUr for t2LDRpci_pic.
    363   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    364                                 InstrStage<1, [SW_LS], 3>,
    365                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
    366                                [4, 1]>,
    367 
    368   // Integer store pipeline
    369   ///
    370   // Immediate offset
    371   InstrItinData<IIC_iStore_i  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    372                                  InstrStage<1, [SW_LS]>],
    373                                 [1, 1]>,
    374   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    375                                  InstrStage<1, [SW_LS]>],
    376                                 [1, 1]>,
    377   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
    378                                  InstrStage<1, [SW_DIS1], 0>,
    379                                  InstrStage<1, [SW_DIS2], 0>,
    380                                  InstrStage<1, [SW_LS], 0>,
    381                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    382                                  InstrStage<1, [SW_LS]>],
    383                                 [1, 1]>,
    384   //
    385   // Register offset
    386   InstrItinData<IIC_iStore_r  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    387                                  InstrStage<1, [SW_LS]>],
    388                                 [1, 1, 1]>,
    389   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    390                                  InstrStage<1, [SW_LS]>],
    391                                 [1, 1, 1]>,
    392   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
    393                                  InstrStage<1, [SW_DIS1], 0>,
    394                                  InstrStage<1, [SW_DIS2], 0>,
    395                                  InstrStage<1, [SW_LS], 0>,
    396                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    397                                  InstrStage<1, [SW_LS]>],
    398                                 [1, 1, 1]>,
    399   //
    400   // Scaled register offset
    401   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [SW_DIS0], 0>,
    402                                   InstrStage<1, [SW_DIS1], 0>,
    403                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    404                                   InstrStage<1, [SW_LS]>],
    405                                  [1, 1, 1]>,
    406   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
    407                                   InstrStage<1, [SW_DIS1], 0>,
    408                                   InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    409                                   InstrStage<1, [SW_LS]>],
    410                                  [1, 1, 1]>,
    411   //
    412   // Immediate offset with update
    413   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [SW_DIS0], 0>,
    414                                   InstrStage<1, [SW_DIS1], 0>,
    415                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    416                                   InstrStage<1, [SW_LS]>],
    417                                  [1, 1, 1]>,
    418   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
    419                                   InstrStage<1, [SW_DIS1], 0>,
    420                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    421                                   InstrStage<1, [SW_LS]>],
    422                                  [1, 1, 1]>,
    423   //
    424   // Register offset with update
    425   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [SW_DIS0], 0>,
    426                                   InstrStage<1, [SW_DIS1], 0>,
    427                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    428                                   InstrStage<1, [SW_LS]>],
    429                                  [1, 1, 1, 1]>,
    430   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
    431                                   InstrStage<1, [SW_DIS1], 0>,
    432                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    433                                   InstrStage<1, [SW_LS]>],
    434                                  [1, 1, 1, 1]>,
    435   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
    436                                   InstrStage<1, [SW_DIS1], 0>,
    437                                   InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    438                                   InstrStage<1, [SW_LS]>],
    439                                  [1, 1, 1, 1]>,
    440   //
    441   // Scaled register offset with update
    442   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [SW_DIS0], 0>,
    443                                     InstrStage<1, [SW_DIS1], 0>,
    444                                     InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    445                                     InstrStage<1, [SW_LS], 0>,
    446                                     InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
    447                                    [3, 1, 1, 1]>,
    448   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
    449                                     InstrStage<1, [SW_DIS1], 0>,
    450                                     InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
    451                                     InstrStage<1, [SW_LS], 0>,
    452                                     InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
    453                                    [3, 1, 1, 1]>,
    454   //
    455   // Store multiple
    456   InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
    457                                 InstrStage<1, [SW_DIS1], 0>,
    458                                 InstrStage<1, [SW_DIS2], 0>,
    459                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    460                                 InstrStage<1, [SW_LS], 1>,
    461                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    462                                 InstrStage<1, [SW_LS], 1>,
    463                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    464                                 InstrStage<1, [SW_LS]>],
    465                                 [], [], -1>, // dynamic uops
    466   //
    467   // Store multiple + update
    468   InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
    469                                 InstrStage<1, [SW_DIS1], 0>,
    470                                 InstrStage<1, [SW_DIS2], 0>,
    471                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    472                                 InstrStage<1, [SW_LS], 1>,
    473                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    474                                 InstrStage<1, [SW_LS], 1>,
    475                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
    476                                 InstrStage<1, [SW_LS]>],
    477                                [2], [], -1>, // dynamic uops
    478 
    479   //
    480   // Preload
    481   InstrItinData<IIC_Preload,   [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
    482 
    483   // Branch
    484   //
    485   // no delay slots, so the latency of a branch is unimportant
    486   InstrItinData<IIC_Br       , [InstrStage<1, [SW_DIS0], 0>]>,
    487 
    488   // FP Special Register to Integer Register File Move
    489   InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    490                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
    491                              [1]>,
    492   //
    493   // Single-precision FP Unary
    494   //
    495   // Most floating-point moves get issued on ALU0.
    496   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    497                                InstrStage<1, [SW_ALU0]>],
    498                               [2, 1]>,
    499   //
    500   // Double-precision FP Unary
    501   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    502                                InstrStage<1, [SW_ALU0]>],
    503                               [2, 1]>,
    504 
    505   //
    506   // Single-precision FP Compare
    507   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    508                                InstrStage<1, [SW_ALU0]>],
    509                               [1, 1]>,
    510   //
    511   // Double-precision FP Compare
    512   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    513                                InstrStage<1, [SW_ALU0]>],
    514                               [1, 1]>,
    515   //
    516   // Single to Double FP Convert
    517   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    518                                InstrStage<1, [SW_ALU1]>],
    519                               [4, 1]>,
    520   //
    521   // Double to Single FP Convert
    522   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    523                                InstrStage<1, [SW_ALU1]>],
    524                               [4, 1]>,
    525 
    526   //
    527   // Single to Half FP Convert
    528   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
    529                                InstrStage<1, [SW_DIS1], 0>,
    530                                InstrStage<1, [SW_ALU1], 4>,
    531                                InstrStage<1, [SW_ALU1]>],
    532                               [6, 1]>,
    533   //
    534   // Half to Single FP Convert
    535   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    536                                InstrStage<1, [SW_ALU1]>],
    537                               [4, 1]>,
    538 
    539   //
    540   // Single-Precision FP to Integer Convert
    541   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    542                                InstrStage<1, [SW_ALU1]>],
    543                               [4, 1]>,
    544   //
    545   // Double-Precision FP to Integer Convert
    546   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    547                                InstrStage<1, [SW_ALU1]>],
    548                               [4, 1]>,
    549   //
    550   // Integer to Single-Precision FP Convert
    551   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    552                                InstrStage<1, [SW_ALU1]>],
    553                               [4, 1]>,
    554   //
    555   // Integer to Double-Precision FP Convert
    556   InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    557                                InstrStage<1, [SW_ALU1]>],
    558                               [4, 1]>,
    559   //
    560   // Single-precision FP ALU
    561   InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    562                                InstrStage<1, [SW_ALU0]>],
    563                               [2, 1, 1]>,
    564   //
    565   // Double-precision FP ALU
    566   InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    567                                InstrStage<1, [SW_ALU0]>],
    568                               [2, 1, 1]>,
    569   //
    570   // Single-precision FP Multiply
    571   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    572                                InstrStage<1, [SW_ALU1]>],
    573                               [4, 1, 1]>,
    574   //
    575   // Double-precision FP Multiply
    576   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    577                                InstrStage<1, [SW_ALU1]>],
    578                               [6, 1, 1]>,
    579   //
    580   // Single-precision FP MAC
    581   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    582                                InstrStage<1, [SW_ALU1]>],
    583                               [8, 1, 1]>,
    584   //
    585   // Double-precision FP MAC
    586   InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    587                                InstrStage<1, [SW_ALU1]>],
    588                               [12, 1, 1]>,
    589   //
    590   // Single-precision Fused FP MAC
    591   InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    592                                InstrStage<1, [SW_ALU1]>],
    593                               [8, 1, 1]>,
    594   //
    595   // Double-precision Fused FP MAC
    596   InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    597                                InstrStage<1, [SW_ALU1]>],
    598                               [12, 1, 1]>,
    599   //
    600   // Single-precision FP DIV
    601   InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    602                                InstrStage<1, [SW_ALU1], 0>,
    603                                InstrStage<15, [SW_FDIV]>],
    604                               [17, 1, 1]>,
    605   //
    606   // Double-precision FP DIV
    607   InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    608                                InstrStage<1, [SW_ALU1], 0>,
    609                                InstrStage<30, [SW_FDIV]>],
    610                               [32, 1, 1]>,
    611   //
    612   // Single-precision FP SQRT
    613   InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    614                                InstrStage<1, [SW_ALU1], 0>,
    615                                InstrStage<15, [SW_FDIV]>],
    616                               [17, 1]>,
    617   //
    618   // Double-precision FP SQRT
    619   InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    620                                InstrStage<1, [SW_ALU1], 0>,
    621                                InstrStage<30, [SW_FDIV]>],
    622                               [32, 1, 1]>,
    623 
    624   //
    625   // Integer to Single-precision Move
    626   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [SW_DIS0], 0>,
    627                                InstrStage<1, [SW_DIS1], 0>,
    628                                InstrStage<1, [SW_LS], 4>,
    629                                InstrStage<1, [SW_ALU0]>],
    630                               [6, 1]>,
    631   //
    632   // Integer to Double-precision Move
    633   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    634                                InstrStage<1, [SW_LS]>],
    635                               [4, 1]>,
    636   //
    637   // Single-precision to Integer Move
    638   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    639                                InstrStage<1, [SW_LS]>],
    640                               [3, 1]>,
    641   //
    642   // Double-precision to Integer Move
    643   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [SW_DIS0], 0>,
    644                                InstrStage<1, [SW_DIS1], 0>,
    645                                InstrStage<1, [SW_LS], 3>,
    646                                InstrStage<1, [SW_LS]>],
    647                               [3, 4, 1]>,
    648   //
    649   // Single-precision FP Load
    650   InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    651                                InstrStage<1, [SW_LS]>],
    652                               [4, 1]>,
    653   //
    654   // Double-precision FP Load
    655   InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    656                                InstrStage<1, [SW_LS]>],
    657                               [4, 1]>,
    658   //
    659   // FP Load Multiple
    660   // FIXME: Assumes a single Q register.
    661   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    662                                InstrStage<1, [SW_LS]>],
    663                               [1, 1, 1, 4], [], -1>, // dynamic uops
    664   //
    665   // FP Load Multiple + update
    666   // FIXME: Assumes a single Q register.
    667   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
    668                                InstrStage<1, [SW_DIS1], 0>,
    669                                InstrStage<1, [SW_LS], 4>,
    670                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
    671                               [2, 1, 1, 1, 4], [], -1>, // dynamic uops
    672   //
    673   // Single-precision FP Store
    674   InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    675                                InstrStage<1, [SW_LS]>],
    676                               [1, 1]>,
    677   //
    678   // Double-precision FP Store
    679   InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    680                                InstrStage<1, [SW_LS]>],
    681                               [1, 1]>,
    682   //
    683   // FP Store Multiple
    684   // FIXME: Assumes a single Q register.
    685   InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    686                                InstrStage<1, [SW_LS]>],
    687                               [1, 1, 1], [], -1>, // dynamic uops
    688   //
    689   // FP Store Multiple + update
    690   // FIXME: Assumes a single Q register.
    691   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
    692                                 InstrStage<1, [SW_DIS1], 0>,
    693                                 InstrStage<1, [SW_LS], 4>,
    694                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
    695                                [2, 1, 1, 1], [], -1>, // dynamic uops
    696   // NEON
    697   //
    698   // Double-register Integer Unary
    699   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    700                                InstrStage<1, [SW_ALU0]>],
    701                               [4, 1]>,
    702   //
    703   // Quad-register Integer Unary
    704   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    705                                InstrStage<1, [SW_ALU0]>],
    706                               [4, 1]>,
    707   //
    708   // Double-register Integer Q-Unary
    709   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    710                                InstrStage<1, [SW_ALU0]>],
    711                               [4, 1]>,
    712   //
    713   // Quad-register Integer CountQ-Unary
    714   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    715                                InstrStage<1, [SW_ALU0]>],
    716                               [4, 1]>,
    717   //
    718   // Double-register Integer Binary
    719   InstrItinData<IIC_VBINiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    720                                InstrStage<1, [SW_ALU0]>],
    721                               [2, 1, 1]>,
    722   //
    723   // Quad-register Integer Binary
    724   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    725                                InstrStage<1, [SW_ALU0]>],
    726                               [2, 1, 1]>,
    727   //
    728   // Double-register Integer Subtract
    729   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    730                                InstrStage<1, [SW_ALU0]>],
    731                               [2, 1, 1]>,
    732   //
    733   // Quad-register Integer Subtract
    734   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    735                                InstrStage<1, [SW_ALU0]>],
    736                               [2, 1, 1]>,
    737   //
    738   // Double-register Integer Shift
    739   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    740                                InstrStage<1, [SW_ALU0]>],
    741                               [2, 1, 1]>,
    742   //
    743   // Quad-register Integer Shift
    744   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    745                                InstrStage<1, [SW_ALU0]>],
    746                               [2, 1, 1]>,
    747   //
    748   // Double-register Integer Shift (4 cycle)
    749   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    750                                InstrStage<1, [SW_ALU0]>],
    751                               [4, 1, 1]>,
    752   //
    753   // Quad-register Integer Shift (4 cycle)
    754   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    755                                InstrStage<1, [SW_ALU0]>],
    756                               [4, 1, 1]>,
    757   //
    758   // Double-register Integer Binary (4 cycle)
    759   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    760                                InstrStage<1, [SW_ALU0]>],
    761                               [4, 1, 1]>,
    762   //
    763   // Quad-register Integer Binary (4 cycle)
    764   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    765                                InstrStage<1, [SW_ALU0]>],
    766                               [4, 1, 1]>,
    767   //
    768   // Double-register Integer Subtract (4 cycle)
    769   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    770                                InstrStage<1, [SW_ALU0]>],
    771                               [4, 1, 1]>,
    772   //
    773   // Quad-register Integer Subtract (4 cycle)
    774   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    775                                InstrStage<1, [SW_ALU0]>],
    776                               [4, 1, 1]>,
    777 
    778   //
    779   // Double-register Integer Count
    780   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    781                                InstrStage<1, [SW_ALU0]>],
    782                               [2, 1, 1]>,
    783   //
    784   // Quad-register Integer Count
    785   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    786                                InstrStage<1, [SW_ALU0]>],
    787                               [2, 1, 1]>,
    788   //
    789   // Double-register Absolute Difference and Accumulate
    790   InstrItinData<IIC_VABAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    791                                InstrStage<1, [SW_ALU0]>],
    792                               [4, 1, 1, 1]>,
    793   //
    794   // Quad-register Absolute Difference and Accumulate
    795   InstrItinData<IIC_VABAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    796                                InstrStage<1, [SW_ALU0]>],
    797                               [4, 1, 1, 1]>,
    798   //
    799   // Double-register Integer Pair Add Long
    800   InstrItinData<IIC_VPALiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    801                                InstrStage<1, [SW_ALU0]>],
    802                               [4, 1, 1]>,
    803   //
    804   // Quad-register Integer Pair Add Long
    805   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    806                                InstrStage<1, [SW_ALU0]>],
    807                               [4, 1, 1]>,
    808 
    809   //
    810   // Double-register Integer Multiply (.8, .16)
    811   InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    812                                InstrStage<1, [SW_ALU1]>],
    813                               [4, 1, 1]>,
    814   //
    815   // Quad-register Integer Multiply (.8, .16)
    816   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    817                                InstrStage<1, [SW_ALU1]>],
    818                               [4, 1, 1]>,
    819 
    820   //
    821   // Double-register Integer Multiply (.32)
    822   InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    823                                InstrStage<1, [SW_ALU1]>],
    824                               [4, 1, 1]>,
    825   //
    826   // Quad-register Integer Multiply (.32)
    827   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    828                                InstrStage<1, [SW_ALU1]>],
    829                               [4, 1, 1]>,
    830   //
    831   // Double-register Integer Multiply-Accumulate (.8, .16)
    832   InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    833                                InstrStage<1, [SW_ALU1]>],
    834                               [4, 1, 1, 1]>,
    835   //
    836   // Double-register Integer Multiply-Accumulate (.32)
    837   InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    838                                InstrStage<1, [SW_ALU1]>],
    839                               [4, 1, 1, 1]>,
    840   //
    841   // Quad-register Integer Multiply-Accumulate (.8, .16)
    842   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    843                                InstrStage<1, [SW_ALU1]>],
    844                               [4, 1, 1, 1]>,
    845   //
    846   // Quad-register Integer Multiply-Accumulate (.32)
    847   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    848                                InstrStage<1, [SW_ALU1]>],
    849                               [4, 1, 1, 1]>,
    850 
    851   //
    852   // Move
    853   InstrItinData<IIC_VMOV,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    854                                InstrStage<1, [SW_ALU0]>],
    855                               [2, 1]>,
    856   //
    857   // Move Immediate
    858   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    859                                InstrStage<1, [SW_ALU0]>],
    860                               [2]>,
    861   //
    862   // Double-register Permute Move
    863   InstrItinData<IIC_VMOVD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    864                                InstrStage<1, [SW_ALU1]>],
    865                               [2, 1]>,
    866   //
    867   // Quad-register Permute Move
    868   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    869                                InstrStage<1, [SW_ALU1]>],
    870                               [2, 1]>,
    871   //
    872   // Integer to Single-precision Move
    873   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [SW_DIS0], 0>,
    874                                InstrStage<1, [SW_DIS1], 0>,
    875                                InstrStage<1, [SW_LS], 4>,
    876                                InstrStage<1, [SW_ALU0]>],
    877                               [6, 1]>,
    878   //
    879   // Integer to Double-precision Move
    880   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    881                                InstrStage<1, [SW_LS]>],
    882                               [4, 1, 1]>,
    883   //
    884   // Single-precision to Integer Move
    885   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    886                                InstrStage<1, [SW_LS]>],
    887                               [3, 1]>,
    888   //
    889   // Double-precision to Integer Move
    890   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [SW_DIS0], 0>,
    891                                InstrStage<1, [SW_DIS1], 0>,
    892                                InstrStage<1, [SW_LS], 3>,
    893                                InstrStage<1, [SW_LS]>],
    894                               [3, 4, 1]>,
    895   //
    896   // Integer to Lane Move
    897   // FIXME: I think this is correct, but it is not clear from the tuning guide.
    898   InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
    899                                InstrStage<1, [SW_DIS1], 0>,
    900                                InstrStage<1, [SW_LS], 4>,
    901                                InstrStage<1, [SW_ALU0]>],
    902                               [6, 1]>,
    903 
    904   //
    905   // Vector narrow move
    906   InstrItinData<IIC_VMOVN,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    907                                InstrStage<1, [SW_ALU1]>],
    908                               [2, 1]>,
    909   //
    910   // Double-register FP Unary
    911   // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
    912   //        and they issue on a different pipeline.
    913   InstrItinData<IIC_VUNAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    914                                InstrStage<1, [SW_ALU0]>],
    915                               [2, 1]>,
    916   //
    917   // Quad-register FP Unary
    918   // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
    919   //        and they issue on a different pipeline.
    920   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    921                                InstrStage<1, [SW_ALU0]>],
    922                               [2, 1]>,
    923   //
    924   // Double-register FP Binary
    925   // FIXME: We're using this itin for many instructions.
    926   InstrItinData<IIC_VBIND,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    927                                InstrStage<1, [SW_ALU0]>],
    928                               [4, 1, 1]>,
    929 
    930   //
    931   // VPADD, etc.
    932   InstrItinData<IIC_VPBIND,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    933                                InstrStage<1, [SW_ALU0]>],
    934                               [4, 1, 1]>,
    935   //
    936   // Double-register FP VMUL
    937   InstrItinData<IIC_VFMULD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    938                                InstrStage<1, [SW_ALU1]>],
    939                               [4, 1, 1]>,
    940   //
    941   // Quad-register FP Binary
    942   InstrItinData<IIC_VBINQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    943                                InstrStage<1, [SW_ALU0]>],
    944                               [4, 1, 1]>,
    945   //
    946   // Quad-register FP VMUL
    947   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    948                                InstrStage<1, [SW_ALU1]>],
    949                               [4, 1, 1]>,
    950   //
    951   // Double-register FP Multiple-Accumulate
    952   InstrItinData<IIC_VMACD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    953                                InstrStage<1, [SW_ALU1]>],
    954                               [8, 1, 1]>,
    955   //
    956   // Quad-register FP Multiple-Accumulate
    957   InstrItinData<IIC_VMACQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    958                                InstrStage<1, [SW_ALU1]>],
    959                               [8, 1, 1]>,
    960   //
    961   // Double-register Fused FP Multiple-Accumulate
    962   InstrItinData<IIC_VFMACD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    963                                InstrStage<1, [SW_ALU1]>],
    964                               [8, 1, 1]>,
    965   //
    966   // Quad-register FusedF P Multiple-Accumulate
    967   InstrItinData<IIC_VFMACQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    968                                InstrStage<1, [SW_ALU1]>],
    969                               [8, 1, 1]>,
    970   //
    971   // Double-register Reciprical Step
    972   InstrItinData<IIC_VRECSD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    973                                InstrStage<1, [SW_ALU1]>],
    974                               [8, 1, 1]>,
    975   //
    976   // Quad-register Reciprical Step
    977   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
    978                                InstrStage<1, [SW_ALU1]>],
    979                               [8, 1, 1]>,
    980   //
    981   // Double-register Permute
    982   // FIXME: The latencies are unclear from the documentation.
    983   InstrItinData<IIC_VPERMD,   [InstrStage<1, [SW_DIS0], 0>,
    984                                InstrStage<1, [SW_DIS1], 0>,
    985                                InstrStage<1, [SW_DIS2], 0>,
    986                                InstrStage<1, [SW_ALU1], 2>,
    987                                InstrStage<1, [SW_ALU1], 2>,
    988                                InstrStage<1, [SW_ALU1]>],
    989                               [3, 4, 3, 4]>,
    990   //
    991   // Quad-register Permute
    992   // FIXME: The latencies are unclear from the documentation.
    993   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [SW_DIS0], 0>,
    994                                InstrStage<1, [SW_DIS1], 0>,
    995                                InstrStage<1, [SW_DIS2], 0>,
    996                                InstrStage<1, [SW_ALU1], 2>,
    997                                InstrStage<1, [SW_ALU1], 2>,
    998                                InstrStage<1, [SW_ALU1]>],
    999                               [3, 4, 3, 4]>,
   1000   //
   1001   // Quad-register Permute (3 cycle issue on A9)
   1002   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [SW_DIS0], 0>,
   1003                                InstrStage<1, [SW_DIS1], 0>,
   1004                                InstrStage<1, [SW_DIS2], 0>,
   1005                                InstrStage<1, [SW_ALU1], 2>,
   1006                                InstrStage<1, [SW_ALU1], 2>,
   1007                                InstrStage<1, [SW_ALU1]>],
   1008                               [3, 4, 3, 4]>,
   1009 
   1010   //
   1011   // Double-register VEXT
   1012   InstrItinData<IIC_VEXTD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
   1013                                InstrStage<1, [SW_ALU1]>],
   1014                               [2, 1, 1]>,
   1015   //
   1016   // Quad-register VEXT
   1017   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
   1018                                InstrStage<1, [SW_ALU1]>],
   1019                               [2, 1, 1]>,
   1020   //
   1021   // VTB
   1022   InstrItinData<IIC_VTB1,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
   1023                                InstrStage<1, [SW_ALU1]>],
   1024                               [2, 1, 1]>,
   1025   InstrItinData<IIC_VTB2,     [InstrStage<1, [SW_DIS0], 0>,
   1026                                InstrStage<1, [SW_DIS1], 0>,
   1027                                InstrStage<1, [SW_ALU1], 2>,
   1028                                InstrStage<1, [SW_ALU1]>],
   1029                               [4, 1, 3, 3]>,
   1030   InstrItinData<IIC_VTB3,     [InstrStage<1, [SW_DIS0], 0>,
   1031                                InstrStage<1, [SW_DIS1], 0>,
   1032                                InstrStage<1, [SW_DIS2], 0>,
   1033                                InstrStage<1, [SW_ALU1], 2>,
   1034                                InstrStage<1, [SW_ALU1], 2>,
   1035                                InstrStage<1, [SW_ALU1]>],
   1036                               [6, 1, 3, 5, 5]>,
   1037   InstrItinData<IIC_VTB4,     [InstrStage<1, [SW_DIS0], 0>,
   1038                                InstrStage<1, [SW_DIS1], 0>,
   1039                                InstrStage<1, [SW_DIS2], 0>,
   1040                                InstrStage<1, [SW_ALU1], 2>,
   1041                                InstrStage<1, [SW_ALU1], 2>,
   1042                                InstrStage<1, [SW_ALU1], 2>,
   1043                                InstrStage<1, [SW_ALU1]>],
   1044                               [8, 1, 3, 5, 7, 7]>,
   1045   //
   1046   // VTBX
   1047   InstrItinData<IIC_VTBX1,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
   1048                                InstrStage<1, [SW_ALU1]>],
   1049                               [2, 1, 1]>,
   1050   InstrItinData<IIC_VTBX2,    [InstrStage<1, [SW_DIS0], 0>,
   1051                                InstrStage<1, [SW_DIS1], 0>,
   1052                                InstrStage<1, [SW_ALU1], 2>,
   1053                                InstrStage<1, [SW_ALU1]>],
   1054                               [4, 1, 3, 3]>,
   1055   InstrItinData<IIC_VTBX3,    [InstrStage<1, [SW_DIS0], 0>,
   1056                                InstrStage<1, [SW_DIS1], 0>,
   1057                                InstrStage<1, [SW_DIS2], 0>,
   1058                                InstrStage<1, [SW_ALU1], 2>,
   1059                                InstrStage<1, [SW_ALU1], 2>,
   1060                                InstrStage<1, [SW_ALU1]>],
   1061                               [6, 1, 3, 5, 5]>,
   1062   InstrItinData<IIC_VTBX4,    [InstrStage<1, [SW_DIS0], 0>,
   1063                                InstrStage<1, [SW_DIS1], 0>,
   1064                                InstrStage<1, [SW_DIS2], 0>,
   1065                                InstrStage<1, [SW_ALU1], 2>,
   1066                                InstrStage<1, [SW_ALU1], 2>,
   1067                                InstrStage<1, [SW_ALU1], 2>,
   1068                                InstrStage<1, [SW_ALU1]>],
   1069                               [8, 1, 3, 5, 7, 7]>
   1070 ]>;
   1071 
   1072 // ===---------------------------------------------------------------------===//
   1073 // This following definitions describe the simple machine model which
   1074 // will replace itineraries.
   1075 
   1076 // Swift machine model for scheduling and other instruction cost heuristics.
   1077 def SwiftModel : SchedMachineModel {
   1078   let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
   1079   let MicroOpBufferSize = 45; // Based on NEON renamed registers.
   1080   let LoadLatency = 3;
   1081   let MispredictPenalty = 14; // A branch direction mispredict.
   1082 
   1083   let Itineraries = SwiftItineraries;
   1084 }
   1085 
   1086 // Swift predicates.
   1087 def IsFastImmShiftSwiftPred : SchedPredicate<[{TII->isSwiftFastImmShift(MI)}]>;
   1088 
   1089 // Swift resource mapping.
   1090 let SchedModel = SwiftModel in {
   1091   // Processor resources.
   1092   def SwiftUnitP01 : ProcResource<2>; // ALU unit.
   1093   def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit.
   1094   def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit.
   1095   def SwiftUnitP2 : ProcResource<1>; // LS unit.
   1096   def SwiftUnitDiv : ProcResource<1>;
   1097 
   1098   // Generic resource requirements.
   1099   def SwiftWriteP0OneCycle : SchedWriteRes<[SwiftUnitP0]>;
   1100   def SwiftWriteP0TwoCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 2; }
   1101   def SwiftWriteP0FourCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 4; }
   1102   def SwiftWriteP0SixCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 6; }
   1103   def SwiftWriteP0P1FourCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
   1104     let Latency = 4;
   1105   }
   1106   def SwiftWriteP0P1SixCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
   1107     let Latency = 6;
   1108   }
   1109   def SwiftWriteP01OneCycle : SchedWriteRes<[SwiftUnitP01]>;
   1110   def SwiftWriteP1TwoCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 2; }
   1111   def SwiftWriteP1FourCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 4; }
   1112   def SwiftWriteP1SixCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 6; }
   1113   def SwiftWriteP1EightCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 8; }
   1114   def SwiftWriteP1TwelveCyc : SchedWriteRes<[SwiftUnitP1]> { let Latency = 12; }
   1115   def SwiftWriteP01OneCycle2x : WriteSequence<[SwiftWriteP01OneCycle], 2>;
   1116   def SwiftWriteP01OneCycle3x : WriteSequence<[SwiftWriteP01OneCycle], 3>;
   1117   def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; }
   1118   def SwiftWriteP01ThreeCycleTwoUops : SchedWriteRes<[SwiftUnitP01,
   1119                                                       SwiftUnitP01]> {
   1120     let Latency = 3;
   1121     let NumMicroOps = 2;
   1122   }
   1123   def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> {
   1124     let Latency = 3;
   1125     let NumMicroOps = 3;
   1126     let ResourceCycles = [3];
   1127   }
   1128   // Plain load without writeback.
   1129   def SwiftWriteP2ThreeCycle : SchedWriteRes<[SwiftUnitP2]> {
   1130     let Latency = 3;
   1131   }
   1132   def SwiftWriteP2FourCycle : SchedWriteRes<[SwiftUnitP2]> {
   1133     let Latency = 4;
   1134   }
   1135   // A store does not write to a register.
   1136   def SwiftWriteP2 : SchedWriteRes<[SwiftUnitP2]> {
   1137     let Latency = 0;
   1138   }
   1139   foreach Num = 1-4 in {
   1140     def SwiftWrite#Num#xP2 : WriteSequence<[SwiftWriteP2], Num>;
   1141   }
   1142   def SwiftWriteP01OneCycle2x_load : WriteSequence<[SwiftWriteP01OneCycle,
   1143                                                     SwiftWriteP01OneCycle,
   1144                                                     SwiftWriteP2ThreeCycle]>;
   1145   // 4.2.4 Arithmetic and Logical.
   1146   // ALU operation register shifted by immediate variant.
   1147   def SwiftWriteALUsi : SchedWriteVariant<[
   1148     // lsl #2, lsl #1, or lsr #1.
   1149     SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01TwoCycle]>,
   1150     SchedVar<NoSchedPred,             [WriteALU]>
   1151   ]>;
   1152   def SwiftWriteALUsr : SchedWriteVariant<[
   1153     SchedVar<IsPredicatedPred, [SwiftWriteP01ThreeCycleTwoUops]>,
   1154     SchedVar<NoSchedPred,      [SwiftWriteP01TwoCycle]>
   1155   ]>;
   1156   def SwiftWriteALUSsr : SchedWriteVariant<[
   1157     SchedVar<IsPredicatedPred, [SwiftWriteP0ThreeCycleThreeUops]>,
   1158     SchedVar<NoSchedPred,      [SwiftWriteP01TwoCycle]>
   1159   ]>;
   1160   def SwiftReadAdvanceALUsr : SchedReadVariant<[
   1161     SchedVar<IsPredicatedPred, [SchedReadAdvance<2>]>,
   1162     SchedVar<NoSchedPred,      [NoReadAdvance]>
   1163   ]>;
   1164   // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR
   1165   // AND,BIC,EOR,ORN,ORR
   1166   // CLZ,RBIT,REV,REV16,REVSH,PKH
   1167   def : WriteRes<WriteALU, [SwiftUnitP01]>;
   1168   def : SchedAlias<WriteALUsi, SwiftWriteALUsi>;
   1169   def : SchedAlias<WriteALUsr, SwiftWriteALUsr>;
   1170   def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
   1171   def : ReadAdvance<ReadALU, 0>;
   1172   def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
   1173 
   1174 
   1175   def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[
   1176     SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01OneCycle]>,
   1177     SchedVar<NoSchedPred,             [SwiftWriteP01TwoCycle]>
   1178   ]>;
   1179 
   1180   // 4.2.5 Integer comparison
   1181   def : WriteRes<WriteCMP, [SwiftUnitP01]>;
   1182   def : SchedAlias<WriteCMPsi, SwiftChooseShiftKindP01OneOrTwoCycle>;
   1183   def : SchedAlias<WriteCMPsr, SwiftWriteP01TwoCycle>;
   1184 
   1185   // 4.2.6 Shift, Move
   1186   // Shift
   1187   //  ASR,LSL,ROR,RRX
   1188   //  MOV(register-shiftedregister)  MVN(register-shiftedregister)
   1189   // Move
   1190   //  MOV,MVN
   1191   //  MOVT
   1192   // Sign/Zero extension
   1193   def : InstRW<[SwiftWriteP01OneCycle],
   1194                (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
   1195                           "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH",
   1196                           "t2UXTB16")>;
   1197   // Pseudo instructions.
   1198   def : InstRW<[SwiftWriteP01OneCycle2x],
   1199         (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi32imm",
   1200                    "t2MOVi32imm", "t2MOV_ga_dyn")>;
   1201   def : InstRW<[SwiftWriteP01OneCycle3x],
   1202         (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel", "t2MOVi16_ga_pcrel")>;
   1203   def : InstRW<[SwiftWriteP01OneCycle2x_load],
   1204         (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
   1205 
   1206   def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
   1207 
   1208   def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[
   1209     SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>,
   1210     SchedVar<NoSchedPred,     [ SwiftWriteP0OneCycle ]>
   1211   ]>;
   1212 
   1213   // 4.2.7 Select
   1214   // SEL
   1215   def : InstRW<[SwiftPredP0OneOrTwoCycle], (instregex "SEL", "t2SEL")>;
   1216 
   1217   // 4.2.8 Bitfield
   1218   // BFI,BFC, SBFX,UBFX
   1219   def : InstRW< [SwiftWriteP01TwoCycle],
   1220         (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
   1221         "(t|t2)UBFX", "(t|t2)SBFX")>;
   1222 
   1223   // 4.2.9 Saturating arithmetic
   1224   def : InstRW< [SwiftWriteP01TwoCycle],
   1225         (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
   1226         "USAT16", "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
   1227         "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
   1228         "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
   1229         "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
   1230         "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>;
   1231 
   1232   // 4.2.10 Parallel Arithmetic
   1233   // Not flag setting.
   1234   def : InstRW< [SwiftWriteALUsr],
   1235         (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
   1236         "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
   1237         "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
   1238         "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
   1239   // Flag setting.
   1240   def : InstRW< [SwiftWriteP01TwoCycle],
   1241        (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
   1242        "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
   1243        "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
   1244        "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
   1245        "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
   1246        "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
   1247 
   1248   // 4.2.11 Sum of Absolute Difference
   1249   def : InstRW< [SwiftWriteP0P1FourCycle], (instregex "USAD8") >;
   1250   def : InstRW<[SwiftWriteP0P1FourCycle, ReadALU, ReadALU, SchedReadAdvance<2>],
   1251         (instregex "USADA8")>;
   1252 
   1253   // 4.2.12 Integer Multiply (32-bit result)
   1254   // Two sources.
   1255   def : InstRW< [SwiftWriteP0FourCycle],
   1256         (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
   1257         "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
   1258         "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
   1259         "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
   1260 
   1261   def SwiftWriteP0P01FiveCycleTwoUops :
   1262       SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]>  {
   1263     let Latency = 5;
   1264   }
   1265 
   1266   def SwiftPredP0P01FourFiveCycle : SchedWriteVariant<[
   1267     SchedVar<IsPredicatedPred, [ SwiftWriteP0P01FiveCycleTwoUops ]>,
   1268     SchedVar<NoSchedPred,      [ SwiftWriteP0FourCycle ]>
   1269   ]>;
   1270 
   1271   def SwiftReadAdvanceFourCyclesPred : SchedReadVariant<[
   1272      SchedVar<IsPredicatedPred, [SchedReadAdvance<4>]>,
   1273      SchedVar<NoSchedPred,      [ReadALU]>
   1274   ]>;
   1275 
   1276   // Multiply accumulate, three sources
   1277   def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
   1278                  SwiftReadAdvanceFourCyclesPred],
   1279         (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
   1280         "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS",
   1281         "t2SMMLSR")>;
   1282 
   1283   // 4.2.13 Integer Multiply (32-bit result, Q flag)
   1284   def : InstRW< [SwiftWriteP0FourCycle],
   1285         (instregex "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX")>;
   1286   def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
   1287                  SwiftReadAdvanceFourCyclesPred],
   1288         (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
   1289         "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
   1290         "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT")>;
   1291   def : InstRW< [SwiftPredP0P01FourFiveCycle],
   1292         (instregex "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX")>;
   1293 
   1294   def SwiftP0P0P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
   1295     let Latency = 5;
   1296     let NumMicroOps = 3;
   1297     let ResourceCycles = [2, 1];
   1298   }
   1299   def SwiftWrite1Cycle : SchedWriteRes<[]> {
   1300     let Latency = 1;
   1301     let NumMicroOps = 0;
   1302   }
   1303   def SwiftWrite5Cycle : SchedWriteRes<[]> {
   1304     let Latency = 5;
   1305     let NumMicroOps = 0;
   1306   }
   1307   def SwiftWrite6Cycle : SchedWriteRes<[]> {
   1308     let Latency = 6;
   1309     let NumMicroOps = 0;
   1310   }
   1311 
   1312   // 4.2.14 Integer Multiply, Long
   1313   def : InstRW< [SwiftP0P0P01FiveCycle, SwiftWrite5Cycle],
   1314         (instregex "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$")>;
   1315 
   1316   def Swift2P03P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
   1317     let Latency = 7;
   1318     let NumMicroOps = 5;
   1319     let ResourceCycles = [2, 3];
   1320   }
   1321 
   1322   // 4.2.15 Integer Multiply Accumulate, Long
   1323   // 4.2.16 Integer Multiply Accumulate, Dual
   1324   // 4.2.17 Integer Multiply Accumulate Accumulate, Long
   1325   // We are being a bit inaccurate here.
   1326   def : InstRW< [SwiftWrite5Cycle, Swift2P03P01FiveCycle, ReadALU, ReadALU,
   1327                  SchedReadAdvance<4>, SchedReadAdvance<3>],
   1328         (instregex "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
   1329         "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
   1330         "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB", "t2SMLALBT",
   1331         "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", "t2SMLSLD", "t2SMLSLDX",
   1332         "t2UMAAL")>;
   1333 
   1334   def SwiftDiv : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
   1335     let NumMicroOps = 1;
   1336     let Latency = 14;
   1337     let ResourceCycles = [1, 14];
   1338   }
   1339   // 4.2.18 Integer Divide
   1340   def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround.
   1341   def : InstRW <[SwiftDiv],
   1342         (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
   1343 
   1344   // 4.2.19 Integer Load Single Element
   1345   // 4.2.20 Integer Load Signextended
   1346   def SwiftWriteP2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
   1347     let Latency = 3;
   1348     let NumMicroOps = 2;
   1349   }
   1350   def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
   1351     let Latency = 4;
   1352     let NumMicroOps = 2;
   1353   }
   1354   def SwiftWriteP2P01P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01,
   1355                                                    SwiftUnitP01]> {
   1356     let Latency = 4;
   1357     let NumMicroOps = 3;
   1358   }
   1359   def SwiftWriteP2P2ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2]> {
   1360     let Latency = 3;
   1361     let NumMicroOps = 2;
   1362   }
   1363   def SwiftWriteP2P2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2,
   1364                                                    SwiftUnitP01]> {
   1365     let Latency = 3;
   1366     let NumMicroOps = 3;
   1367   }
   1368   def SwiftWrBackOne : SchedWriteRes<[]> {
   1369     let Latency = 1;
   1370     let NumMicroOps = 0;
   1371   }
   1372   def SwiftWriteLdFour : SchedWriteRes<[]> {
   1373     let Latency = 4;
   1374     let NumMicroOps = 0;
   1375   }
   1376    // Not accurate.
   1377   def : InstRW<[SwiftWriteP2ThreeCycle],
   1378         (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
   1379         "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "tLDR[BH](r|i|spi|pci|pciASM)",
   1380         "tLDR(r|i|spi|pci|pciASM)")>;
   1381   def : InstRW<[SwiftWriteP2ThreeCycle],
   1382         (instregex "LDRH$",  "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>;
   1383   def : InstRW<[SwiftWriteP2P01FourCyle],
   1384         (instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
   1385         "t2LDRpci_pic", "tLDRS(B|H)")>;
   1386   def : InstRW<[SwiftWriteP2P01ThreeCycle,  SwiftWrBackOne],
   1387         (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
   1388         "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
   1389         "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T")>;
   1390   def : InstRW<[SwiftWriteP2P01P01FourCycle, SwiftWrBackOne],
   1391         (instregex "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
   1392         "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T")>;
   1393 
   1394   // 4.2.21 Integer Dual Load
   1395   // Not accurate.
   1396   def : InstRW<[SwiftWriteP2P2ThreeCycle, SwiftWriteLdFour],
   1397         (instregex "t2LDRDi8", "LDRD$")>;
   1398   def : InstRW<[SwiftWriteP2P2P01ThreeCycle, SwiftWriteLdFour, SwiftWrBackOne],
   1399         (instregex "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
   1400 
   1401   // 4.2.22 Integer Load, Multiple
   1402   // NumReg = 1 .. 16
   1403   foreach Lat = 3-25 in {
   1404     def SwiftWriteLM#Lat#Cy : SchedWriteRes<[SwiftUnitP2]> {
   1405       let Latency = Lat;
   1406     }
   1407     def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> {
   1408       let Latency = Lat;
   1409       let NumMicroOps = 0;
   1410     }
   1411   }
   1412   // Predicate.
   1413   foreach NumAddr = 1-16 in {
   1414     def SwiftLMAddr#NumAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
   1415   }
   1416   def SwiftWriteLDMAddrNoWB : SchedWriteRes<[SwiftUnitP01]> { let Latency = 0; }
   1417   def SwiftWriteLDMAddrWB : SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]>;
   1418   def SwiftWriteLM : SchedWriteVariant<[
   1419     SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy]>,
   1420     SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1421                                 SwiftWriteLM5Cy]>,
   1422     SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1423                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy]>,
   1424     SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1425                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1426                                 SwiftWriteLM7Cy]>,
   1427     SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1428                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1429                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy]>,
   1430     SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1431                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1432                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1433                                 SwiftWriteLM9Cy]>,
   1434     SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1435                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1436                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1437                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy]>,
   1438     SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1439                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1440                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1441                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1442                                 SwiftWriteLM11Cy]>,
   1443     SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1444                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1445                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1446                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1447                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy]>,
   1448     SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1449                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1450                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1451                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1452                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
   1453                                 SwiftWriteLM13Cy]>,
   1454     SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1455                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1456                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1457                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1458                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
   1459                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy]>,
   1460     SchedVar<SwiftLMAddr13Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1461                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1462                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1463                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1464                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
   1465                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
   1466                                 SwiftWriteLM15Cy]>,
   1467     SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1468                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1469                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1470                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1471                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
   1472                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
   1473                                 SwiftWriteLM15Cy, SwiftWriteLM16Cy]>,
   1474     SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1475                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1476                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1477                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1478                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
   1479                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
   1480                                 SwiftWriteLM15Cy, SwiftWriteLM16Cy,
   1481                                 SwiftWriteLM17Cy]>,
   1482     SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1483                                 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
   1484                                 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1485                                 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1486                                 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
   1487                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
   1488                                 SwiftWriteLM15Cy, SwiftWriteLM16Cy,
   1489                                 SwiftWriteLM17Cy, SwiftWriteLM18Cy]>,
   1490     // Unknow number of registers, just use resources for two registers.
   1491     SchedVar<NoSchedPred,      [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
   1492                                 SwiftWriteLM5CyNo, SwiftWriteLM6CyNo,
   1493                                 SwiftWriteLM7CyNo, SwiftWriteLM8CyNo,
   1494                                 SwiftWriteLM9CyNo, SwiftWriteLM10CyNo,
   1495                                 SwiftWriteLM11CyNo, SwiftWriteLM12CyNo,
   1496                                 SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
   1497                                 SwiftWriteLM15CyNo, SwiftWriteLM16CyNo,
   1498                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo]>
   1499 
   1500   ]> { let Variadic=1; }
   1501 
   1502   def : InstRW<[SwiftWriteLM, SwiftWriteLDMAddrNoWB],
   1503         (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
   1504         "(t|sys)LDM(IA|DA|DB|IB)$")>;
   1505   def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM],
   1506         (instregex /*"t2LDMIA_RET", "tLDMIA_RET", "LDMIA_RET",*/
   1507         "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
   1508   def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM, SwiftWriteP1TwoCycle],
   1509         (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
   1510   // 4.2.23 Integer Store, Single Element
   1511   def : InstRW<[SwiftWriteP2],
   1512         (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX",
   1513         "t2STR(i12|i8|s)$", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
   1514 
   1515   def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2],
   1516         (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
   1517         "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
   1518         "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
   1519         "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
   1520 
   1521   // 4.2.24 Integer Store, Dual
   1522   def : InstRW<[SwiftWriteP2, SwiftWriteP2, SwiftWriteP01OneCycle],
   1523         (instregex "STRD$", "t2STRDi8")>;
   1524   def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2, SwiftWriteP2,
   1525                 SwiftWriteP01OneCycle],
   1526         (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
   1527 
   1528   // 4.2.25 Integer Store, Multiple
   1529   def SwiftWriteStIncAddr : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
   1530     let Latency = 0;
   1531     let NumMicroOps = 2;
   1532   }
   1533   foreach NumAddr = 1-16 in {
   1534      def SwiftWriteSTM#NumAddr : WriteSequence<[SwiftWriteStIncAddr], NumAddr>;
   1535   }
   1536   def SwiftWriteSTM : SchedWriteVariant<[
   1537     SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM2]>,
   1538     SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM3]>,
   1539     SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM4]>,
   1540     SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM5]>,
   1541     SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM6]>,
   1542     SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM7]>,
   1543     SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM8]>,
   1544     SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM9]>,
   1545     SchedVar<SwiftLMAddr10Pred,[SwiftWriteSTM10]>,
   1546     SchedVar<SwiftLMAddr11Pred,[SwiftWriteSTM11]>,
   1547     SchedVar<SwiftLMAddr12Pred,[SwiftWriteSTM12]>,
   1548     SchedVar<SwiftLMAddr13Pred,[SwiftWriteSTM13]>,
   1549     SchedVar<SwiftLMAddr14Pred,[SwiftWriteSTM14]>,
   1550     SchedVar<SwiftLMAddr15Pred,[SwiftWriteSTM15]>,
   1551     SchedVar<SwiftLMAddr16Pred,[SwiftWriteSTM16]>,
   1552     // Unknow number of registers, just use resources for two registers.
   1553     SchedVar<NoSchedPred,      [SwiftWriteSTM2]>
   1554   ]>;
   1555   def : InstRW<[SwiftWriteSTM],
   1556         (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
   1557   def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteSTM],
   1558         (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
   1559         "PUSH", "tPUSH")>;
   1560 
   1561   // 4.2.26 Branch
   1562   def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; }
   1563   def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; }
   1564   def : WriteRes<WriteBrTbl, [SwiftUnitP1, SwiftUnitP2]> { let Latency = 0; }
   1565 
   1566   // 4.2.27 Not issued
   1567   def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
   1568   def : InstRW<[WriteNoop], (instregex "t2IT", "IT", "NOP")>;
   1569 
   1570   // 4.2.28 Advanced SIMD, Integer, 2 cycle
   1571   def : InstRW<[SwiftWriteP0TwoCycle],
   1572         (instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL",
   1573                    "VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi",
   1574                    "VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST",
   1575                    "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL", "VQSHLU", "VBIF",
   1576                    "VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
   1577 
   1578   def : InstRW<[SwiftWriteP1TwoCycle],
   1579         (instregex "VEXT", "VREV16", "VREV32", "VREV64")>;
   1580 
   1581   // 4.2.29 Advanced SIMD, Integer, 4 cycle
   1582   // 4.2.30 Advanced SIMD, Integer with Accumulate
   1583   def : InstRW<[SwiftWriteP0FourCycle],
   1584         (instregex "VABA", "VABAL", "VPADAL", "VRSRA", "VSRA", "VACGE", "VACGT",
   1585         "VACLE", "VACLT", "VCEQ", "VCGE", "VCGT", "VCLE", "VCLT", "VRSHL",
   1586         "VQRSHL", "VRSHR(u|s)", "VABS(f|v)", "VQABS", "VQNEG", "VQADD",
   1587         "VQSUB")>;
   1588   def : InstRW<[SwiftWriteP1FourCycle],
   1589         (instregex "VRECPE", "VRSQRTE")>;
   1590 
   1591   // 4.2.31 Advanced SIMD, Add and Shift with Narrow
   1592   def : InstRW<[SwiftWriteP0P1FourCycle],
   1593         (instregex "VADDHN", "VSUBHN", "VSHRN")>;
   1594   def : InstRW<[SwiftWriteP0P1SixCycle],
   1595         (instregex "VRADDHN", "VRSUBHN", "VRSHRN", "VQSHRN", "VQSHRUN",
   1596                    "VQRSHRN", "VQRSHRUN")>;
   1597 
   1598   // 4.2.32 Advanced SIMD, Vector Table Lookup
   1599   foreach Num = 1-4 in {
   1600     def SwiftWrite#Num#xP1TwoCycle : WriteSequence<[SwiftWriteP1TwoCycle], Num>;
   1601   }
   1602   def : InstRW<[SwiftWrite1xP1TwoCycle],
   1603         (instregex "VTB(L|X)1")>;
   1604   def : InstRW<[SwiftWrite2xP1TwoCycle],
   1605         (instregex "VTB(L|X)2")>;
   1606   def : InstRW<[SwiftWrite3xP1TwoCycle],
   1607         (instregex "VTB(L|X)3")>;
   1608   def : InstRW<[SwiftWrite4xP1TwoCycle],
   1609         (instregex "VTB(L|X)4")>;
   1610 
   1611   // 4.2.33 Advanced SIMD, Transpose
   1612   def : InstRW<[SwiftWriteP1FourCycle, SwiftWriteP1FourCycle,
   1613                 SwiftWriteP1TwoCycle/*RsrcOnly*/, SchedReadAdvance<2>],
   1614         (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
   1615 
   1616   // 4.2.34 Advanced SIMD and VFP, Floating Point
   1617   def : InstRW<[SwiftWriteP0TwoCycle], (instregex "VABS(S|D)$", "VNEG(S|D)$")>;
   1618   def : InstRW<[SwiftWriteP0FourCycle],
   1619         (instregex "VCMP(D|S|ZD|ZS)$", "VCMPE(D|S|ZD|ZS)")>;
   1620   def : InstRW<[SwiftWriteP0FourCycle],
   1621         (instregex "VADD(S|f)", "VSUB(S|f)", "VABD", "VPADDf", "VMAX", "VMIN", "VPMAX",
   1622                    "VPMIN")>;
   1623   def : InstRW<[SwiftWriteP0SixCycle], (instregex "VADDD$", "VSUBD$")>;
   1624   def : InstRW<[SwiftWriteP1EightCycle], (instregex "VRECPS", "VRSQRTS")>;
   1625 
   1626   // 4.2.35 Advanced SIMD and VFP, Multiply
   1627   def : InstRW<[SwiftWriteP1FourCycle],
   1628         (instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH",
   1629                    "VMULL", "VQDMULL")>;
   1630   def : InstRW<[SwiftWriteP1SixCycle],
   1631         (instregex "VMULD", "VNMULD")>;
   1632   def : InstRW<[SwiftWriteP1FourCycle],
   1633         (instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)",
   1634         "VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>;
   1635   def : InstRW<[SwiftWriteP1EightCycle], (instregex "VFMAfd", "VFMSfd")>;
   1636   def : InstRW<[SwiftWriteP1TwelveCyc], (instregex "VFMAfq", "VFMSfq")>;
   1637 
   1638   // 4.2.36 Advanced SIMD and VFP, Convert
   1639   def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>;
   1640   // Fixpoint conversions.
   1641   def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; }
   1642 
   1643   // 4.2.37 Advanced SIMD and VFP, Move
   1644   def : InstRW<[SwiftWriteP0TwoCycle],
   1645         (instregex "VMOVv", "VMOV(S|D)$", "VMOV(S|D)cc",
   1646                    "VMVNv", "VMVN(d|q)", "VMVN(S|D)cc",
   1647                    "FCONST(D|S)")>;
   1648   def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VMOVN", "VMOVL")>;
   1649   def : InstRW<[WriteSequence<[SwiftWriteP0FourCycle, SwiftWriteP1TwoCycle]>],
   1650         (instregex "VQMOVN")>;
   1651   def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VDUPLN", "VDUPf")>;
   1652   def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>],
   1653         (instregex "VDUP(8|16|32)")>;
   1654   def : InstRW<[SwiftWriteP2ThreeCycle], (instregex "VMOVRS$")>;
   1655   def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP0TwoCycle]>],
   1656         (instregex "VMOVSR$", "VSETLN")>;
   1657   def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2FourCycle],
   1658         (instregex "VMOVRR(D|S)$")>;
   1659   def : InstRW<[SwiftWriteP2FourCycle], (instregex "VMOVDRR$")>;
   1660   def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>,
   1661                 WriteSequence<[SwiftWrite1Cycle, SwiftWriteP2FourCycle,
   1662                                SwiftWriteP1TwoCycle]>],
   1663                 (instregex "VMOVSRR$")>;
   1664   def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle]>],
   1665         (instregex "VGETLN(u|i)")>;
   1666   def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle,
   1667                                SwiftWriteP01OneCycle]>],
   1668         (instregex "VGETLNs")>;
   1669 
   1670   // 4.2.38 Advanced SIMD and VFP, Move FPSCR
   1671   // Serializing instructions.
   1672   def SwiftWaitP0For15Cy : SchedWriteRes<[SwiftUnitP0]> {
   1673     let Latency = 15;
   1674     let ResourceCycles = [15];
   1675   }
   1676   def SwiftWaitP1For15Cy : SchedWriteRes<[SwiftUnitP1]> {
   1677     let Latency = 15;
   1678     let ResourceCycles = [15];
   1679   }
   1680   def SwiftWaitP2For15Cy : SchedWriteRes<[SwiftUnitP2]> {
   1681     let Latency = 15;
   1682     let ResourceCycles = [15];
   1683   }
   1684   def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
   1685         (instregex "VMRS")>;
   1686   def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
   1687         (instregex "VMSR")>;
   1688   // Not serializing.
   1689   def : InstRW<[SwiftWriteP0TwoCycle], (instregex "FMSTAT")>;
   1690 
   1691   // 4.2.39 Advanced SIMD and VFP, Load Single Element
   1692   def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDRD$", "VLDRS$")>;
   1693 
   1694   // 4.2.40 Advanced SIMD and VFP, Store Single Element
   1695   def : InstRW<[SwiftWriteLM4Cy], (instregex "VSTRD$", "VSTRS$")>;
   1696 
   1697   // 4.2.41 Advanced SIMD and VFP, Load Multiple
   1698   // 4.2.42 Advanced SIMD and VFP, Store Multiple
   1699 
   1700   // Resource requirement for permuting, just reserves the resources.
   1701   foreach Num = 1-28 in {
   1702     def SwiftVLDMPerm#Num : SchedWriteRes<[SwiftUnitP1]> {
   1703       let Latency = 0;
   1704       let NumMicroOps = Num;
   1705       let ResourceCycles = [Num];
   1706     }
   1707   }
   1708 
   1709   // Pre RA pseudos - load/store to a Q register as a D register pair.
   1710   def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDMQIA$", "VSTMQIA$")>;
   1711 
   1712   // Post RA not modelled accurately. We assume that register use of width 64
   1713   // bit maps to a D register, 128 maps to a Q register. Not all different kinds
   1714   // are accurately represented.
   1715   def SwiftWriteVLDM : SchedWriteVariant<[
   1716     // Load of one S register.
   1717     SchedVar<SwiftLMAddr1Pred, [SwiftWriteLM4Cy]>,
   1718     // Load of one D register.
   1719     SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo]>,
   1720     // Load of 3 S register.
   1721     SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1722                                 SwiftWriteLM13CyNo, SwiftWriteP01OneCycle,
   1723                                 SwiftVLDMPerm3]>,
   1724     // Load of a Q register (not necessarily true). We should not be mapping to
   1725     // 4 S registers, either.
   1726     SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo,
   1727                                 SwiftWriteLM4CyNo, SwiftWriteLM4CyNo]>,
   1728     // Load of 5 S registers.
   1729     SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1730                                 SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
   1731                                 SwiftWriteLM17CyNo,  SwiftWriteP01OneCycle,
   1732                                 SwiftVLDMPerm5]>,
   1733     // Load of 3 D registers. (Must also be able to handle s register list -
   1734     // though, not accurate)
   1735     SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1736                                 SwiftWriteLM10Cy, SwiftWriteLM14CyNo,
   1737                                 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
   1738                                 SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
   1739     // Load of 7 S registers.
   1740     SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1741                                 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
   1742                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
   1743                                 SwiftWriteLM21CyNo, SwiftWriteP01OneCycle,
   1744                                 SwiftVLDMPerm7]>,
   1745     // Load of two Q registers.
   1746     SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1747                                 SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
   1748                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1749                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1750                                 SwiftWriteP01OneCycle,  SwiftVLDMPerm2]>,
   1751     // Load of 9 S registers.
   1752     SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1753                                 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
   1754                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
   1755                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1756                                 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
   1757                                 SwiftVLDMPerm9]>,
   1758     // Load of 5 D registers.
   1759     SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1760                                 SwiftWriteLM10Cy, SwiftWriteLM14Cy,
   1761                                 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
   1762                                 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
   1763                                 SwiftWriteLM14CyNo,  SwiftWriteLM14CyNo,
   1764                                 SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
   1765     // Inaccurate: reuse describtion from 9 S registers.
   1766     SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1767                                 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
   1768                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
   1769                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1770                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1771                                 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
   1772                                 SwiftVLDMPerm9]>,
   1773     // Load of three Q registers.
   1774     SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1775                                 SwiftWriteLM11Cy, SwiftWriteLM11Cy,
   1776                                 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
   1777                                 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
   1778                                 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
   1779                                 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
   1780                                 SwiftWriteP01OneCycle, SwiftVLDMPerm3]>,
   1781     // Inaccurate: reuse describtion from 9 S registers.
   1782     SchedVar<SwiftLMAddr13Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1783                                 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
   1784                                 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
   1785                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1786                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1787                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1788                                 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
   1789                                 SwiftVLDMPerm9]>,
   1790     // Load of 7 D registers inaccurate.
   1791     SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1792                                 SwiftWriteLM10Cy, SwiftWriteLM14Cy,
   1793                                 SwiftWriteLM14Cy, SwiftWriteLM14CyNo,
   1794                                 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
   1795                                 SwiftWriteLM14CyNo,  SwiftWriteLM14CyNo,
   1796                                 SwiftWriteLM14CyNo,  SwiftWriteLM14CyNo,
   1797                                 SwiftWriteP01OneCycle, SwiftVLDMPerm7]>,
   1798     SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
   1799                                 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
   1800                                 SwiftWriteLM17Cy, SwiftWriteLM18CyNo,
   1801                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1802                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1803                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1804                                 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
   1805                                 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
   1806                                 SwiftVLDMPerm9]>,
   1807     // Load of 4 Q registers.
   1808     SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM7Cy, SwiftWriteLM10Cy,
   1809                                 SwiftWriteLM11Cy, SwiftWriteLM14Cy,
   1810                                 SwiftWriteLM15Cy, SwiftWriteLM18CyNo,
   1811                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
   1812                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
   1813                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
   1814                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
   1815                                 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
   1816                                 SwiftWriteP01OneCycle, SwiftVLDMPerm4]>,
   1817     // Unknow number of registers, just use resources for two registers.
   1818     SchedVar<NoSchedPred,      [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
   1819                                 SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
   1820                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1821                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1822                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1823                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1824                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1825                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1826                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1827                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1828                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1829                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1830                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1831                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1832                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1833                                 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
   1834                                 SwiftWriteP01OneCycle,  SwiftVLDMPerm2]>
   1835   ]> { let Variadic = 1; }
   1836 
   1837   def : InstRW<[SwiftWriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
   1838 
   1839   def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVLDM],
   1840         (instregex "VLDM[SD](IA|DB)_UPD$")>;
   1841 
   1842   def SwiftWriteVSTM : SchedWriteVariant<[
   1843     // One S register.
   1844     SchedVar<SwiftLMAddr1Pred, [SwiftWriteSTM1]>,
   1845     // One D register.
   1846     SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM1]>,
   1847     // Three S registers.
   1848     SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM4]>,
   1849     // Assume one Q register.
   1850     SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM1]>,
   1851     SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM6]>,
   1852     // Assume three D registers.
   1853     SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM4]>,
   1854     SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM8]>,
   1855     // Assume two Q registers.
   1856     SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM3]>,
   1857     SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM10]>,
   1858     // Assume 5 D registers.
   1859     SchedVar<SwiftLMAddr10Pred, [SwiftWriteSTM6]>,
   1860     SchedVar<SwiftLMAddr11Pred, [SwiftWriteSTM12]>,
   1861     // Assume three Q registers.
   1862     SchedVar<SwiftLMAddr12Pred, [SwiftWriteSTM4]>,
   1863     SchedVar<SwiftLMAddr13Pred, [SwiftWriteSTM14]>,
   1864     // Assume 7 D registers.
   1865     SchedVar<SwiftLMAddr14Pred, [SwiftWriteSTM8]>,
   1866     SchedVar<SwiftLMAddr15Pred, [SwiftWriteSTM16]>,
   1867     // Assume four Q registers.
   1868     SchedVar<SwiftLMAddr16Pred, [SwiftWriteSTM5]>,
   1869     // Asumme two Q registers.
   1870     SchedVar<NoSchedPred, [SwiftWriteSTM3]>
   1871   ]> { let Variadic = 1; }
   1872 
   1873   def : InstRW<[SwiftWriteVSTM], (instregex "VSTM[SD](IA|DB)$")>;
   1874 
   1875   def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVSTM],
   1876         (instregex "VSTM[SD](IA|DB)_UPD")>;
   1877 
   1878   // 4.2.43 Advanced SIMD, Element or Structure Load and Store
   1879   def SwiftWrite2xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
   1880       let Latency = 4;
   1881       let ResourceCycles = [2];
   1882   }
   1883   def SwiftWrite3xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
   1884       let Latency = 4;
   1885       let ResourceCycles = [3];
   1886   }
   1887   foreach Num = 1-2 in {
   1888     def SwiftExt#Num#xP0 : SchedWriteRes<[SwiftUnitP0]> {
   1889       let Latency = 0;
   1890       let NumMicroOps = Num;
   1891       let ResourceCycles = [Num];
   1892     }
   1893   }
   1894   // VLDx
   1895   // Multiple structures.
   1896   // Single element structure loads.
   1897   // We assume aligned.
   1898   // Single/two register.
   1899   def : InstRW<[SwiftWriteLM4Cy], (instregex "VLD1(d|q)(8|16|32|64)$")>;
   1900   def : InstRW<[SwiftWriteLM4Cy, SwiftWriteP01OneCycle],
   1901         (instregex "VLD1(d|q)(8|16|32|64)wb")>;
   1902   // Three register.
   1903   def : InstRW<[SwiftWrite3xP2FourCy],
   1904         (instregex "VLD1(d|q)(8|16|32|64)T$", "VLD1d64TPseudo")>;
   1905   def : InstRW<[SwiftWrite3xP2FourCy, SwiftWriteP01OneCycle],
   1906         (instregex "VLD1(d|q)(8|16|32|64)Twb")>;
   1907   /// Four Register.
   1908   def : InstRW<[SwiftWrite2xP2FourCy],
   1909         (instregex "VLD1(d|q)(8|16|32|64)Q$", "VLD1d64QPseudo")>;
   1910   def : InstRW<[SwiftWrite2xP2FourCy, SwiftWriteP01OneCycle],
   1911         (instregex "VLD1(d|q)(8|16|32|64)Qwb")>;
   1912   // Two element structure loads.
   1913   // Two/four register.
   1914   def : InstRW<[SwiftWriteLM9Cy, SwiftExt2xP0, SwiftVLDMPerm2],
   1915         (instregex "VLD2(d|q|b)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>;
   1916   def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
   1917                 SwiftVLDMPerm2],
   1918         (instregex "VLD2(d|q|b)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>;
   1919   // Three element structure.
   1920   def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
   1921                 SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
   1922         (instregex "VLD3(d|q)(8|16|32)$")>;
   1923   def : InstRW<[SwiftWriteLM9Cy, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
   1924         (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>;
   1925 
   1926   def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
   1927                 SwiftWriteP01OneCycle, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
   1928         (instregex "VLD3(d|q)(8|16|32)_UPD$")>;
   1929   def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm3,
   1930                 SwiftWrite3xP2FourCy],
   1931         (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
   1932   // Four element structure loads.
   1933   def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
   1934                 SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4,
   1935                 SwiftWrite3xP2FourCy],
   1936         (instregex "VLD4(d|q)(8|16|32)$")>;
   1937   def : InstRW<[SwiftWriteLM11Cy,  SwiftExt2xP0, SwiftVLDMPerm4,
   1938                 SwiftWrite3xP2FourCy],
   1939         (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>;
   1940   def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
   1941                 SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
   1942                 SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
   1943         (instregex "VLD4(d|q)(8|16|32)_UPD")>;
   1944   def : InstRW<[SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
   1945                 SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
   1946         (instregex  "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
   1947 
   1948   // Single all/lane loads.
   1949   // One element structure.
   1950   def : InstRW<[SwiftWriteLM6Cy, SwiftVLDMPerm2],
   1951         (instregex "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
   1952   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm2],
   1953         (instregex "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)",
   1954                   "VLD1LNq(8|16|32)Pseudo_UPD")>;
   1955   // Two element structure.
   1956   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftExt1xP0, SwiftVLDMPerm2],
   1957         (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$",
   1958                    "VLD2LN(d|q)(8|16|32)Pseudo$")>;
   1959   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftWriteP01OneCycle,
   1960                 SwiftExt1xP0, SwiftVLDMPerm2],
   1961         (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>;
   1962   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
   1963                 SwiftExt1xP0, SwiftVLDMPerm2],
   1964         (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb")>;
   1965   def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
   1966                 SwiftExt1xP0, SwiftVLDMPerm2],
   1967         (instregex "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>;
   1968   // Three element structure.
   1969   def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy, SwiftExt1xP0,
   1970                 SwiftVLDMPerm3],
   1971         (instregex "VLD3(DUP|LN)(d|q)(8|16|32)$",
   1972                    "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
   1973   def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy,
   1974                 SwiftWriteP01OneCycle, SwiftExt1xP0, SwiftVLDMPerm3],
   1975         (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
   1976   def : InstRW<[SwiftWriteLM7Cy, SwiftWriteP01OneCycle, SwiftWriteLM8Cy,
   1977                 SwiftWriteLM8Cy, SwiftExt1xP0, SwiftVLDMPerm3],
   1978         (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
   1979   // Four element struture.
   1980   def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
   1981                 SwiftWriteLM10CyNo, SwiftExt1xP0, SwiftVLDMPerm5],
   1982         (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$",
   1983                    "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
   1984   def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
   1985                 SwiftWriteLM10CyNo, SwiftWriteP01OneCycle, SwiftExt1xP0,
   1986                 SwiftVLDMPerm5],
   1987         (instregex "VLD4(DUP|LN)(d|q)(8|16|32)_UPD")>;
   1988   def : InstRW<[SwiftWriteLM8Cy, SwiftWriteP01OneCycle, SwiftWriteLM9Cy,
   1989                 SwiftWriteLM10CyNo, SwiftWriteLM10CyNo, SwiftExt1xP0,
   1990                 SwiftVLDMPerm5],
   1991         (instregex "VLD4(DUP|LN)(d|q)(8|16|32)Pseudo_UPD")>;
   1992   // VSTx
   1993   // Multiple structures.
   1994   // Single element structure store.
   1995   def : InstRW<[SwiftWrite1xP2], (instregex "VST1d(8|16|32|64)$")>;
   1996   def : InstRW<[SwiftWrite2xP2], (instregex "VST1q(8|16|32|64)$")>;
   1997   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2],
   1998         (instregex "VST1d(8|16|32|64)wb")>;
   1999   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2],
   2000         (instregex "VST1q(8|16|32|64)wb")>;
   2001   def : InstRW<[SwiftWrite3xP2],
   2002         (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>;
   2003   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite3xP2],
   2004         (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>;
   2005   def : InstRW<[SwiftWrite4xP2],
   2006         (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>;
   2007   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2],
   2008         (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>;
   2009   // Two element structure store.
   2010   def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
   2011         (instregex "VST2(d|b)(8|16|32)$")>;
   2012   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
   2013         (instregex "VST2(b|d)(8|16|32)wb")>;
   2014   def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
   2015         (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>;
   2016   def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
   2017         (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>;
   2018   // Three element structure store.
   2019   def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
   2020         (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>;
   2021   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
   2022         (instregex "VST3(d|q)(8|16|32)_UPD",
   2023                    "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
   2024   // Four element structure store.
   2025   def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
   2026         (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>;
   2027   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm4],
   2028         (instregex "VST4(d|q)(8|16|32)_UPD",
   2029                    "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
   2030   // Single/all lane store.
   2031   // One element structure.
   2032   def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
   2033         (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>;
   2034   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
   2035         (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>;
   2036   // Two element structure.
   2037   def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm2],
   2038         (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>;
   2039   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm2],
   2040         (instregex "VST2LN(d|q)(8|16|32)_UPD",
   2041                    "VST2LN(d|q)(8|16|32)Pseudo_UPD")>;
   2042   // Three element structure.
   2043   def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
   2044         (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>;
   2045   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
   2046         (instregex "VST3LN(d|q)(8|16|32)_UPD",
   2047                    "VST3LN(d|q)(8|16|32)Pseudo_UPD")>;
   2048   // Four element structure.
   2049   def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
   2050         (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>;
   2051   def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2, SwiftVLDMPerm2],
   2052         (instregex "VST4LN(d|q)(8|16|32)_UPD",
   2053                    "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
   2054 
   2055   // 4.2.44 VFP, Divide and Square Root
   2056   def SwiftDiv17 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
   2057     let NumMicroOps = 1;
   2058     let Latency = 17;
   2059     let ResourceCycles = [1, 15];
   2060   }
   2061   def SwiftDiv32 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
   2062     let NumMicroOps = 1;
   2063     let Latency = 32;
   2064     let ResourceCycles = [1, 30];
   2065   }
   2066   def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
   2067   def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
   2068 
   2069   // Not specified.
   2070   def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
   2071   // Preload.
   2072   def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0;
   2073     let ResourceCycles = [0];
   2074   }
   2075 
   2076 }
   2077