Home | History | Annotate | Download | only in AArch64
      1 //==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the uop and latency details for the machine model for the
     11 // Qualcomm Falkor subtarget.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 // Contains all of the Falkor specific SchedWriteRes types. The approach
     16 // below is to define a generic SchedWriteRes for every combination of
     17 // latency and microOps. The naming conventions is to use a prefix, one field
     18 // for latency, and one or more microOp count/type designators.
     19 //   Prefix: FalkorWr
     20 //   MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD)
     21 //   Latency: #cyc
     22 //
     23 // e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued
     24 //      down one Z pipe, six SD pipes, four VX pipes and the total latency is
     25 //      six cycles.
     26 //
     27 // Contains all of the Falkor specific ReadAdvance types for forwarding logic.
     28 //
     29 // Contains all of the Falkor specific WriteVariant types for immediate zero
     30 // and LSLFast.
     31 //===----------------------------------------------------------------------===//
     32 
     33 //===----------------------------------------------------------------------===//
     34 // Define 0 micro-op types
     35 def FalkorWr_LdInc_none_2cyc : SchedWriteRes<[]> {
     36   let Latency = 2;
     37   let NumMicroOps = 0;
     38 }
     39 def FalkorWr_StInc_none_2cyc : SchedWriteRes<[]> {
     40   let Latency = 2;
     41   let NumMicroOps = 0;
     42 }
     43 def FalkorWr_none_3cyc : SchedWriteRes<[]> {
     44   let Latency = 3;
     45   let NumMicroOps = 0;
     46 }
     47 def FalkorWr_none_4cyc : SchedWriteRes<[]> {
     48   let Latency = 4;
     49   let NumMicroOps = 0;
     50 }
     51 
     52 //===----------------------------------------------------------------------===//
     53 // Define 1 micro-op types
     54 
     55 def FalkorWr_1X_2cyc    : SchedWriteRes<[FalkorUnitX]>   { let Latency = 2; }
     56 def FalkorWr_IMUL32_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
     57 def FalkorWr_IMUL64_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
     58 def FalkorWr_IMUL64_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; }
     59 def FalkorWr_1Z_0cyc    : SchedWriteRes<[FalkorUnitZ]>   { let Latency = 0; }
     60 def FalkorWr_1ZB_0cyc   : SchedWriteRes<[FalkorUnitZB]>  { let Latency = 0; }
     61 def FalkorWr_1LD_3cyc   : SchedWriteRes<[FalkorUnitLD]>  { let Latency = 3; }
     62 def FalkorWr_1LD_4cyc   : SchedWriteRes<[FalkorUnitLD]>  { let Latency = 4; }
     63 def FalkorWr_1XYZ_0cyc  : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 0; }
     64 def FalkorWr_1XYZ_1cyc  : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; }
     65 def FalkorWr_1XYZ_2cyc  : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; }
     66 def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; }
     67 def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; }
     68 def FalkorWr_1none_0cyc : SchedWriteRes<[]>              { let Latency = 0; }
     69 
     70 def FalkorWr_1VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 0; }
     71 def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; }
     72 def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; }
     73 def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; }
     74 def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
     75 def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
     76 def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
     77 def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
     78 def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
     79 def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
     80 
     81 def FalkorWr_1LD_0cyc   : SchedWriteRes<[FalkorUnitLD]>  { let Latency = 0; }
     82 def FalkorWr_1ST_0cyc   : SchedWriteRes<[FalkorUnitST]>  { let Latency = 0; }
     83 def FalkorWr_1ST_3cyc   : SchedWriteRes<[FalkorUnitST]>  { let Latency = 3; }
     84 
     85 def FalkorWr_1GTOV_0cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 0; }
     86 def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; }
     87 def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; }
     88 def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
     89 
     90 //===----------------------------------------------------------------------===//
     91 // Define 2 micro-op types
     92 
     93 def FalkorWr_2VXVY_0cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
     94   let Latency = 0;
     95   let NumMicroOps = 2;
     96 }
     97 def FalkorWr_2VXVY_1cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
     98   let Latency = 1;
     99   let NumMicroOps = 2;
    100 }
    101 def FalkorWr_2VXVY_2cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
    102   let Latency = 2;
    103   let NumMicroOps = 2;
    104 }
    105 def FalkorWr_2VXVY_3cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
    106   let Latency = 3;
    107   let NumMicroOps = 2;
    108 }
    109 def FalkorWr_2VXVY_4cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
    110   let Latency = 4;
    111   let NumMicroOps = 2;
    112 }
    113 def FalkorWr_VMUL32_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
    114   let Latency = 4;
    115   let NumMicroOps = 2;
    116 }
    117 def FalkorWr_2VXVY_5cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
    118   let Latency = 5;
    119   let NumMicroOps = 2;
    120 }
    121 def FalkorWr_FMUL32_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
    122   let Latency = 5;
    123   let NumMicroOps = 2;
    124 }
    125 def FalkorWr_2VXVY_6cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
    126   let Latency = 6;
    127   let NumMicroOps = 2;
    128 }
    129 def FalkorWr_FMUL64_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
    130   let Latency = 6;
    131   let NumMicroOps = 2;
    132 }
    133 
    134 def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
    135   let Latency = 4;
    136   let NumMicroOps = 2;
    137 }
    138 def FalkorWr_1XYZ_1LD_4cyc  : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
    139   let Latency = 4;
    140   let NumMicroOps = 2;
    141 }
    142 def FalkorWr_2LD_3cyc   : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
    143   let Latency = 3;
    144   let NumMicroOps = 2;
    145 }
    146 
    147 def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
    148   let Latency = 5;
    149   let NumMicroOps = 2;
    150 }
    151 
    152 def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
    153   let Latency = 2;
    154   let NumMicroOps = 2;
    155 }
    156 
    157 def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
    158   let Latency = 4;
    159   let NumMicroOps = 2;
    160 }
    161 
    162 def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
    163   let Latency = 10;
    164   let NumMicroOps = 2;
    165 }
    166 
    167 def FalkorWr_1VX_1VY_12cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
    168   let Latency = 12;
    169   let NumMicroOps = 2;
    170 }
    171 
    172 def FalkorWr_1VX_1VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
    173   let Latency = 14;
    174   let NumMicroOps = 2;
    175 }
    176 
    177 def FalkorWr_1VX_1VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
    178   let Latency = 21;
    179   let NumMicroOps = 2;
    180 }
    181 
    182 def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> {
    183   let Latency = 2;
    184   let NumMicroOps = 2;
    185 }
    186 
    187 def FalkorWr_2GTOV_1cyc    : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> {
    188   let Latency = 1;
    189   let NumMicroOps = 2;
    190 }
    191 
    192 def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> {
    193   let Latency = 4;
    194   let NumMicroOps = 2;
    195 }
    196 def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
    197   let Latency = 5;
    198   let NumMicroOps = 2;
    199 }
    200 
    201 def FalkorWr_2XYZ_2cyc   : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> {
    202   let Latency = 2;
    203   let NumMicroOps = 2;
    204 }
    205 
    206 def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> {
    207   let Latency = 0;
    208   let NumMicroOps = 2;
    209 }
    210 
    211 def FalkorWr_1X_1Z_8cyc  : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
    212   let Latency = 8;
    213   let NumMicroOps = 2;
    214   let ResourceCycles = [2, 8];
    215 }
    216 
    217 def FalkorWr_1X_1Z_11cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
    218   let Latency = 11;
    219   let NumMicroOps = 2;
    220   let ResourceCycles = [2, 11];
    221 }
    222 
    223 def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> {
    224   let Latency = 3;
    225   let NumMicroOps = 2;
    226 }
    227 
    228 def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> {
    229   let Latency = 3;
    230   let NumMicroOps = 2;
    231 }
    232 
    233 def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> {
    234   let Latency = 0;
    235   let NumMicroOps = 2;
    236 }
    237 
    238 def FalkorWr_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitVSD, FalkorUnitST]> {
    239   let Latency = 0;
    240   let NumMicroOps = 2;
    241 }
    242 
    243 //===----------------------------------------------------------------------===//
    244 // Define 3 micro-op types
    245 
    246 def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
    247                                                FalkorUnitLD]> {
    248   let Latency = 0;
    249   let NumMicroOps = 3;
    250 }
    251 
    252 def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
    253                                                FalkorUnitLD]> {
    254   let Latency = 3;
    255   let NumMicroOps = 3;
    256 }
    257 
    258 def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
    259   let Latency = 3;
    260   let NumMicroOps = 3;
    261 }
    262 
    263 def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
    264   let Latency = 4;
    265   let NumMicroOps = 3;
    266 }
    267 
    268 def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
    269   let Latency = 5;
    270   let NumMicroOps = 3;
    271 }
    272 
    273 def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
    274   let Latency = 6;
    275   let NumMicroOps = 3;
    276 }
    277 
    278 def FalkorWr_1LD_2VXVY_4cyc  : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
    279   let Latency = 4;
    280   let NumMicroOps = 3;
    281 }
    282 
    283 def FalkorWr_2LD_1none_3cyc  : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
    284   let Latency = 3;
    285   let NumMicroOps = 3;
    286 }
    287 
    288 def FalkorWr_3LD_3cyc        : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
    289                                               FalkorUnitLD]> {
    290   let Latency = 3;
    291   let NumMicroOps = 3;
    292 }
    293 
    294 def FalkorWr_2LD_1Z_3cyc     : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
    295                                              FalkorUnitZ]> {
    296   let Latency = 3;
    297   let NumMicroOps = 3;
    298 }
    299 
    300 def FalkorWr_1XYZ_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitSD, FalkorUnitST]> {
    301   let Latency = 0;
    302   let NumMicroOps = 3;
    303 }
    304 def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, FalkorUnitST]> {
    305   let Latency = 0;
    306   let NumMicroOps = 3;
    307 }
    308 //===----------------------------------------------------------------------===//
    309 // Define 4 micro-op types
    310 
    311 def FalkorWr_2VX_2VY_14cyc  : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
    312                                              FalkorUnitVX, FalkorUnitVY]> {
    313   let Latency = 14;
    314   let NumMicroOps = 4;
    315 }
    316 
    317 def FalkorWr_2VX_2VY_20cyc  : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
    318                                              FalkorUnitVX, FalkorUnitVY]> {
    319   let Latency = 20;
    320   let NumMicroOps = 4;
    321 }
    322 
    323 def FalkorWr_2VX_2VY_21cyc  : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
    324                                              FalkorUnitVX, FalkorUnitVY]> {
    325   let Latency = 21;
    326   let NumMicroOps = 4;
    327 }
    328 
    329 def FalkorWr_2VX_2VY_24cyc  : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
    330                                              FalkorUnitVX, FalkorUnitVY]> {
    331   let Latency = 24;
    332   let NumMicroOps = 4;
    333 }
    334 
    335 def FalkorWr_4VXVY_2cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
    336                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
    337   let Latency = 2;
    338   let NumMicroOps = 4;
    339 }
    340 def FalkorWr_4VXVY_3cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
    341                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
    342   let Latency = 3;
    343   let NumMicroOps = 4;
    344 }
    345 def FalkorWr_4VXVY_4cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
    346                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
    347   let Latency = 4;
    348   let NumMicroOps = 4;
    349 }
    350 def FalkorWr_4VXVY_6cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
    351                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
    352   let Latency = 6;
    353   let NumMicroOps = 4;
    354 }
    355 
    356 def FalkorWr_4LD_3cyc      : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
    357                                             FalkorUnitLD, FalkorUnitLD]> {
    358   let Latency = 3;
    359   let NumMicroOps = 4;
    360 }
    361 
    362 def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
    363                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
    364   let Latency = 4;
    365   let NumMicroOps = 4;
    366 }
    367 
    368 def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
    369   let Latency = 3;
    370   let NumMicroOps = 4;
    371 }
    372 
    373 def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST,
    374                                               FalkorUnitSD, FalkorUnitLD]> {
    375   let Latency = 3;
    376   let NumMicroOps = 4;
    377 }
    378 
    379 def FalkorWr_2VSD_2ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
    380                                            FalkorUnitST, FalkorUnitVSD]> {
    381   let Latency = 0;
    382   let NumMicroOps = 4;
    383 }
    384 
    385 //===----------------------------------------------------------------------===//
    386 // Define 5 micro-op types
    387 
    388 def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
    389                                             FalkorUnitVXVY, FalkorUnitVXVY,
    390                                             FalkorUnitVXVY]> {
    391   let Latency = 4;
    392   let NumMicroOps = 5;
    393 }
    394 def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
    395                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
    396   let Latency = 4;
    397   let NumMicroOps = 5;
    398 }
    399 def FalkorWr_5VXVY_7cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
    400                                             FalkorUnitVXVY, FalkorUnitVXVY,
    401                                             FalkorUnitVXVY]> {
    402   let Latency = 7;
    403   let NumMicroOps = 5;
    404 }
    405 def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
    406                                                 FalkorUnitVSD, FalkorUnitST,
    407                                                 FalkorUnitVSD]> {
    408   let Latency = 0;
    409   let NumMicroOps = 5;
    410 }
    411 def FalkorWr_1VXVY_2ST_2VSD_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
    412                                                   FalkorUnitVSD, FalkorUnitST,
    413                                                   FalkorUnitVSD]> {
    414   let Latency = 0;
    415   let NumMicroOps = 5;
    416 }
    417 //===----------------------------------------------------------------------===//
    418 // Define 6 micro-op types
    419 
    420 def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
    421                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
    422   let Latency = 4;
    423   let NumMicroOps = 6;
    424 }
    425 
    426 def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
    427                                                 FalkorUnitVSD, FalkorUnitXYZ,
    428                                                 FalkorUnitST, FalkorUnitVSD]> {
    429   let Latency = 0;
    430   let NumMicroOps = 6;
    431 }
    432 
    433 def FalkorWr_2VXVY_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
    434                                                  FalkorUnitVSD, FalkorUnitVXVY,
    435                                                  FalkorUnitST, FalkorUnitVSD]> {
    436   let Latency = 0;
    437   let NumMicroOps = 6;
    438 }
    439 
    440 def FalkorWr_3VSD_3ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
    441                                            FalkorUnitST, FalkorUnitVSD,
    442                                            FalkorUnitST, FalkorUnitVSD]> {
    443   let Latency = 0;
    444   let NumMicroOps = 6;
    445 }
    446 
    447 //===----------------------------------------------------------------------===//
    448 // Define 8 micro-op types
    449 
    450 def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
    451                                              FalkorUnitVXVY, FalkorUnitVXVY,
    452                                              FalkorUnitLD, FalkorUnitLD,
    453                                              FalkorUnitVXVY, FalkorUnitVXVY]> {
    454   let Latency = 4;
    455   let NumMicroOps = 8;
    456 }
    457 
    458 def FalkorWr_4VSD_4ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
    459                                            FalkorUnitST, FalkorUnitVSD,
    460                                            FalkorUnitST, FalkorUnitVSD,
    461                                            FalkorUnitST, FalkorUnitVSD]> {
    462   let Latency = 0;
    463   let NumMicroOps = 8;
    464 }
    465 
    466 //===----------------------------------------------------------------------===//
    467 // Define 9 micro-op types
    468 
    469 def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
    470                                              FalkorUnitLD, FalkorUnitVXVY,
    471                                              FalkorUnitVXVY, FalkorUnitLD,
    472                                              FalkorUnitLD, FalkorUnitXYZ,
    473                                              FalkorUnitVXVY, FalkorUnitVXVY]> {
    474   let Latency = 4;
    475   let NumMicroOps = 9;
    476 }
    477 
    478 def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
    479                                              FalkorUnitLD, FalkorUnitVXVY,
    480                                              FalkorUnitVXVY, FalkorUnitXYZ,
    481                                              FalkorUnitLD, FalkorUnitLD,
    482                                              FalkorUnitVXVY, FalkorUnitVXVY]> {
    483   let Latency = 4;
    484   let NumMicroOps = 9;
    485 }
    486 
    487 //===----------------------------------------------------------------------===//
    488 // Define 10 micro-op types
    489 
    490 def FalkorWr_2VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
    491                                                  FalkorUnitVSD, FalkorUnitVXVY,
    492                                                  FalkorUnitST, FalkorUnitVSD,
    493                                                  FalkorUnitST, FalkorUnitVSD,
    494                                                  FalkorUnitST, FalkorUnitVSD]> {
    495   let Latency = 0;
    496   let NumMicroOps = 10;
    497 }
    498 
    499 //===----------------------------------------------------------------------===//
    500 // Define 12 micro-op types
    501 
    502 def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
    503                                                  FalkorUnitVSD, FalkorUnitVXVY,
    504                                                  FalkorUnitST, FalkorUnitVSD,
    505                                                  FalkorUnitVXVY, FalkorUnitST,
    506                                                  FalkorUnitVSD, FalkorUnitVXVY,
    507                                                  FalkorUnitST, FalkorUnitVSD]> {
    508   let Latency = 0;
    509   let NumMicroOps = 12;
    510 }
    511 
    512 // Forwarding logic is modeled for multiply add/accumulate and
    513 // load/store base register increment.
    514 // -----------------------------------------------------------------------------
    515 def FalkorReadIMA32  : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>;
    516 def FalkorReadIMA64  : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>;
    517 def FalkorReadVMA    : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr_VMUL32_2VXVY_4cyc]>;
    518 def FalkorReadFMA32  : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>;
    519 def FalkorReadFMA64  : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>;
    520 
    521 def FalkorReadIncLd  : SchedReadAdvance<1, [FalkorWr_LdInc_none_2cyc]>;
    522 def FalkorReadIncSt  : SchedReadAdvance<1, [FalkorWr_StInc_none_2cyc]>;
    523 
    524 // SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast
    525 // -----------------------------------------------------------------------------
    526 def FalkorImmZPred    : SchedPredicate<[{MI->getOperand(1).isImm() &&
    527                                          MI->getOperand(1).getImm() == 0}]>;
    528 def FalkorOp1ZrReg    : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR ||
    529 
    530                                          MI->getOperand(1).getReg() == AArch64::XZR}]>;
    531 def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>;
    532 
    533 def FalkorWr_FMOV  : SchedWriteVariant<[
    534                        SchedVar<FalkorOp1ZrReg,  [FalkorWr_1none_0cyc]>,
    535                        SchedVar<NoSchedPred,     [FalkorWr_1GTOV_1cyc]>]>;
    536 
    537 def FalkorWr_MOVZ  : SchedWriteVariant<[
    538                        SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
    539                        SchedVar<NoSchedPred,    [FalkorWr_1XYZB_0cyc]>]>; // imm fwd
    540 
    541 
    542 def FalkorWr_ADDSUBsx : SchedWriteVariant<[
    543                           SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_1cyc]>,
    544                           SchedVar<NoSchedPred,            [FalkorWr_2XYZ_2cyc]>]>;
    545 
    546 def FalkorWr_LDRro : SchedWriteVariant<[
    547                        SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_3cyc]>,
    548                        SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1LD_4cyc]>]>;
    549 
    550 def FalkorWr_LDRSro : SchedWriteVariant<[
    551                         SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_4cyc]>,
    552                         SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1LD_5cyc]>]>;
    553 
    554 def FalkorWr_ORRi : SchedWriteVariant<[
    555                       SchedVar<FalkorOp1ZrReg, [FalkorWr_1XYZ_0cyc]>, // imm fwd
    556                       SchedVar<NoSchedPred,    [FalkorWr_1XYZ_1cyc]>]>;
    557 
    558 def FalkorWr_PRFMro : SchedWriteVariant<[
    559                         SchedVar<FalkorShiftExtFastPred, [FalkorWr_1ST_3cyc]>,
    560                         SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1ST_4cyc]>]>;
    561 
    562 def FalkorWr_STRVro : SchedWriteVariant<[
    563                         SchedVar<FalkorShiftExtFastPred, [FalkorWr_1VSD_1ST_0cyc]>,
    564                         SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1VSD_1ST_0cyc]>]>;
    565 
    566 def FalkorWr_STRQro : SchedWriteVariant<[
    567                         SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_2ST_2VSD_0cyc]>,
    568                         SchedVar<NoSchedPred,            [FalkorWr_2XYZ_2ST_2VSD_0cyc]>]>;
    569 
    570 def FalkorWr_STRro : SchedWriteVariant<[
    571                        SchedVar<FalkorShiftExtFastPred, [FalkorWr_1SD_1ST_0cyc]>,
    572                        SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1SD_1ST_0cyc]>]>;
    573 
    574 //===----------------------------------------------------------------------===//
    575 // Specialize the coarse model by associating instruction groups with the
    576 // subtarget-defined types. As the modeled is refined, this will override most
    577 // of the earlier mappings.
    578 
    579 // Miscellaneous
    580 // -----------------------------------------------------------------------------
    581 
    582 // FIXME: This could be better modeled by looking at the regclasses of the operands.
    583 def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs COPY)>;
    584 
    585 // SIMD Floating-point Instructions
    586 // -----------------------------------------------------------------------------
    587 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(FABS|FNEG)v2f32$")>;
    588 
    589 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v2i32p)$")>;
    590 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FAC(GE|GT)(32|64)$")>;
    591 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FCM(EQ|GE|GT)(32|64|v2f32|v2i32)$")>;
    592 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64|v2i32)rz$")>;
    593 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FRINT(A|I|M|N|P|X|Z)v2f32$")>;
    594 
    595 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^F(MAX|MIN)(NM)?Vv4i32v$")>;
    596 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(FABD|FADD|FSUB)v2f32$")>;
    597 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^FADDP(v2i32p|v2i64p|v2f32)$")>;
    598 
    599 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v2f32)$")>;
    600 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instrs FCVTXNv1i64)>;
    601 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^FCVTZ(S|U)v2i32(_shift)?$")>;
    602 
    603 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
    604                                       (instregex "^(FMUL|FMULX)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
    605 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
    606                                       (instrs FMULX32)>;
    607 
    608 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
    609                                       (instregex "^(FMUL|FMULX)v1i64_indexed$")>;
    610 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
    611                                       (instrs FMULX64)>;
    612 
    613 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(FABS|FNEG)(v2f64|v4f32)$")>;
    614 
    615 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v2i64p)$")>;
    616 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz$")>;
    617 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instrs FCVTLv4i16, FCVTLv2i32)>;
    618 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>;
    619 
    620 def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVv2f32)>;
    621 def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTv2f32)>;
    622 
    623 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>;
    624 
    625 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32)$")>;
    626 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instrs FCVTLv8i16, FCVTLv4i32)>;
    627 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^FCVTZ(S|U)(v2i64|v4i32)(_shift)?$")>;
    628 
    629 def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
    630                                       (instregex "^(FMUL|FMULX)(v2f64|v4f32|v4i32_indexed)$")>;
    631 
    632 def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
    633                                       (instregex "^(FMUL|FMULX)v2i64_indexed$")>;
    634 
    635 def : InstRW<[FalkorWr_3VXVY_4cyc],   (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>;
    636 def : InstRW<[FalkorWr_3VXVY_5cyc],   (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>;
    637 
    638 def : InstRW<[FalkorWr_2VX_2VY_14cyc],(instrs FDIVv2f64)>;
    639 def : InstRW<[FalkorWr_2VX_2VY_20cyc],(instrs FDIVv4f32)>;
    640 def : InstRW<[FalkorWr_2VX_2VY_21cyc],(instrs FSQRTv2f64)>;
    641 def : InstRW<[FalkorWr_2VX_2VY_24cyc],(instrs FSQRTv4f32)>;
    642 
    643 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
    644                                       (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
    645 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
    646                                       (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>;
    647 
    648 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32],
    649                                       (instregex "^FML(A|S)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
    650 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64],
    651                                       (instregex "^FML(A|S)v1i64_indexed$")>;
    652 def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32],
    653                                       (instregex "^FML(A|S)(v4f32|v4i32_indexed)$")>;
    654 def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64],
    655                                       (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>;
    656 
    657 // SIMD Integer Instructions
    658 // -----------------------------------------------------------------------------
    659 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^ADD(v1i64|v2i32|v4i16|v8i8)$")>;
    660 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs ADDPv2i64p)>;
    661 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(AND|ORR|ORN|BIC|EOR)v8i8$")>;
    662 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(BIC|ORR)(v2i32|v4i16)$")>;
    663 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^NEG(v1i64|v2i32|v4i16|v8i8)$")>;
    664 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$")>;
    665 
    666 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>;
    667 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)SHLv1i64$")>;
    668 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>;
    669 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)SHRd$")>;
    670 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>;
    671 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>;
    672 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>;
    673 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>;
    674 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instrs PMULv8i8)>;
    675 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>;
    676 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^SHLd$")>;
    677 
    678 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>;
    679 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>;
    680 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)(ABD|ADALP)(v8i8|v4i16|v2i32)(_v.*)?$")>;
    681 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)ADDLVv4i16v$")>;
    682 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
    683 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>;
    684 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
    685 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(s|h|b)$")>;
    686 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
    687 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>;
    688 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>;
    689 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)RSHRd$")>;
    690 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^R?SHRN(v2i32|v4i16|v8i8)_shift$")>;
    691 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
    692 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>;
    693 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs ADDVv4i16v)>;
    694 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)$")>;
    695 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^SQABS(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
    696 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64)$")>;
    697 
    698 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^(S|U)ADDLVv8i8v$")>;
    699 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>;
    700 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instrs ADDVv8i8v)>;
    701 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
    702                                       (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>;
    703 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
    704                                       (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
    705 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
    706                                       (instregex "^SQDMULL(i16|i32)$")>;
    707 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
    708                                       (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
    709 
    710 def : InstRW<[FalkorWr_1VXVY_5cyc],   (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>;
    711 
    712 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs ADDVv4i32v)>;
    713 
    714 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instrs ADDVv8i16v)>;
    715 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(ADD|SUB)HNv.*$")>;
    716 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(S|U)ABA(v2i32|v4i16|v8i8)$")>;
    717 
    718 def : InstRW<[FalkorWr_2VXVY_5cyc],   (instrs ADDVv16i8v)>;
    719 
    720 def : InstRW<[FalkorWr_2VXVY_6cyc],   (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32)_shift?$")>;
    721 def : InstRW<[FalkorWr_2VXVY_6cyc],   (instregex "^R(ADD|SUB)HNv.*$")>;
    722 
    723 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^ADD(v16i8|v8i16|v4i32|v2i64)$")>;
    724 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs ADDPv2i64)>; // sz==11
    725 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(AND|ORR|ORN|BIC|EOR)v16i8$")>;
    726 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(BIC|ORR)(v8i16|v4i32)$")>;
    727 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(NEG|SUB)(v16i8|v8i16|v4i32|v2i64)$")>;
    728 
    729 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)ADDLv.*$")>;
    730 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v16i8|v2i64|v4i32|v8i16)(_v.*)?$")>;
    731 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
    732 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)SHR(v16i8|v8i16|v4i32|v2i64)_shift$")>;
    733 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)SUBLv.*$")>;
    734 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^((S|U)?(MAX|MIN)P?|ABS)(v16i8|v2i64|v4i32|v8i16)$")>;
    735 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; // sz!=11
    736 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>;
    737 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>;
    738 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>;
    739 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^PMULL(v8i8|v16i8)$")>;
    740 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>;
    741 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
    742 
    743 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift$")>;
    744 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)ABD(v16i8|v8i16|v4i32|v2i64)$")>;
    745 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)ABDLv.*$")>;
    746 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16|v4i32|v2i64)(_v.*)?$")>;
    747 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>;
    748 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>;
    749 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>;
    750 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^R?SHRN(v2i64|v4i32|v8i16|v16i8)_shift$")>;
    751 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>;
    752 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^PMULL(v1i64|v2i64)$")>;
    753 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>;
    754 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>;
    755 
    756 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
    757                                       (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
    758 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
    759                                       (instregex "^SQDMULLv.*$")>;
    760 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
    761                                       (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>;
    762 
    763 def : InstRW<[FalkorWr_3VXVY_3cyc],   (instregex "^(S|U)ADDLVv4i32v$")>;
    764 
    765 def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^(S|U)ADDLVv8i16v$")>;
    766 
    767 def : InstRW<[FalkorWr_3VXVY_6cyc],   (instregex "^(S|U)ADDLVv16i8v$")>;
    768 
    769 def : InstRW<[FalkorWr_4VXVY_2cyc],   (instregex "^(S|U)(ADD|SUB)Wv.*$")>;
    770 
    771 def : InstRW<[FalkorWr_4VXVY_3cyc],   (instregex "^(S|U)ABALv.*$")>;
    772 
    773 def : InstRW<[FalkorWr_4VXVY_4cyc],   (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>;
    774 
    775 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
    776                                       (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>;
    777 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
    778                                       (instregex "^SQD(MLAL|MLSL)v[248].*$")>;
    779 
    780 // SIMD Load Instructions
    781 // -----------------------------------------------------------------------------
    782 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],       (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
    783 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
    784                                                          (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
    785 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],       (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
    786 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
    787                                                          (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
    788 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],       (instrs LD2i64)>;
    789 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
    790                                                          (instrs LD2i64_POST)>;
    791 
    792 def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], (instregex "^LD1i(8|16|32)$")>;
    793 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd],
    794                                                          (instregex "^LD1i(8|16|32)_POST$")>;
    795 
    796 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
    797 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
    798                                                          (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
    799 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(8b|4h|2s)$")>;
    800 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
    801                                                          (instregex "^LD2Twov(8b|4h|2s)_POST$")>;
    802 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
    803 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
    804                                                          (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
    805 
    806 def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd],       (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
    807 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
    808                                                          (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
    809 def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd],       (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
    810 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
    811                                                          (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
    812 def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd],       (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
    813 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
    814                                                          (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
    815 def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd],       (instrs LD3i64)>;
    816 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
    817                                                          (instrs LD3i64_POST)>;
    818 def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd],       (instrs LD4i64)>;
    819 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
    820                                                          (instrs LD4i64_POST)>;
    821 
    822 def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], (instregex "^LD2i(8|16|32)$")>;
    823 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd],
    824                                                          (instregex "^LD2i(8|16|32)_POST$")>;
    825 
    826 def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
    827 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd],
    828                                                          (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
    829 def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
    830 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd],
    831                                                          (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
    832 
    833 def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd],       (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
    834 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
    835                                                          (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
    836 def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd],       (instrs LD3Threev2d)>;
    837 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
    838                                                          (instrs LD3Threev2d_POST)>;
    839 def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd],       (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
    840 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
    841                                                          (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
    842 
    843 def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], (instregex "^LD3i(8|16|32)$")>;
    844 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd],
    845                                                          (instregex "^LD3i(8|16|32)_POST$")>;
    846 
    847 def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
    848 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd],
    849                                                          (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
    850 def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
    851 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd],
    852                                                          (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
    853 
    854 def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd],       (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
    855 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
    856                                                          (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
    857 def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd],       (instrs LD4Fourv2d)>;
    858 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
    859                                                          (instrs LD4Fourv2d_POST)>;
    860 def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd],       (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
    861 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
    862                                                          (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
    863 
    864 def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], (instregex "^LD4i(8|16|32)$")>;
    865 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd],
    866                                                          (instregex "^LD4i(8|16|32)_POST$")>;
    867 
    868 def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd],
    869                                                          (instregex "^LD3Threev(8b|4h|2s)$")>;
    870 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd],
    871                                                          (instregex "^LD3Threev(8b|4h|2s)_POST$")>;
    872 
    873 def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd],
    874                                                          (instregex "^LD4Fourv(8b|4h|2s)$")>;
    875 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd],
    876                                                          (instregex "^LD4Fourv(8b|4h|2s)_POST$")>;
    877 
    878 def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd],
    879                                                          (instregex "^LD3Threev(16b|8h|4s)$")>;
    880 
    881 def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd],
    882                                                          (instregex "^LD4Fourv(16b|8h|4s)$")>;
    883 
    884 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, FalkorReadIncLd],
    885                                                          (instregex "^LD3Threev(16b|8h|4s)_POST$")>;
    886 
    887 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, FalkorReadIncLd],
    888                                                          (instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
    889 
    890 // Arithmetic and Logical Instructions
    891 // -----------------------------------------------------------------------------
    892 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>;
    893 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ADC(S)?(W|X)r$")>;
    894 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ADD(S)?(W|X)r(r|i)$")>;
    895 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>;
    896 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^AND(S)?(W|X)r(i|r|s)$")>;
    897 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^BIC(S)?(W|X)r(r|s)$")>;
    898 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^EON(W|X)r(r|s)$")>;
    899 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^EOR(W|X)r(i|r|s)$")>;
    900 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ORN(W|X)r(r|s)$")>;
    901 def : InstRW<[FalkorWr_ORRi],         (instregex "^ORR(W|X)ri$")>;
    902 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ORR(W|X)r(r|s)$")>;
    903 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^SBC(S)?(W|X)r$")>;
    904 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^SUB(S)?(W|X)r(r|i)$")>;
    905 def : InstRW<[FalkorWr_ADDSUBsx],     (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>;
    906 def : InstRW<[FalkorWr_ADDSUBsx],     (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>;
    907 
    908 // SIMD Miscellaneous Instructions
    909 // -----------------------------------------------------------------------------
    910 def : InstRW<[FalkorWr_1GTOV_1cyc],   (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
    911 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
    912 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^CPY(i8|i16|i32|i64)$")>;
    913 def : InstRW<[FalkorWr_1GTOV_1cyc],   (instregex "^INSv(i8|i16)(gpr|lane)$")>;
    914 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^(S|U)MOVv.*$")>;
    915 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(BIF|BIT|BSL)v8i8$")>;
    916 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs EXTv8i8)>;
    917 def : InstRW<[FalkorWr_1VXVY_0cyc],   (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd
    918 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs TBLv8i8One)>;
    919 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs NOTv8i8)>;
    920 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^REV(16|32|64)v.*$")>;
    921 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>;
    922 
    923 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(CLS|CLZ|CNT|RBIT)(v2i32|v4i16|v8i8)$")>;
    924 
    925 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "(S|U)QXTU?Nv.*$")>;
    926 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>;
    927 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs FRECPXv1i32, FRECPXv1i64)>;
    928 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs URECPEv2i32, URSQRTEv2i32)>;
    929 
    930 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
    931                                       (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>;
    932 
    933 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
    934                                       (instrs FRECPS64, FRSQRTS64)>;
    935 
    936 def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],
    937                                       (instregex "^INSv(i32|i64)(gpr|lane)$")>;
    938 def : InstRW<[FalkorWr_2GTOV_1cyc],   (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
    939 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(BIF|BIT|BSL)v16i8$")>;
    940 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs EXTv16i8)>;
    941 def : InstRW<[FalkorWr_2VXVY_0cyc],   (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd
    942 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs NOTv16i8)>;
    943 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs TBLv16i8One)>;
    944 
    945 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>;
    946 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>;
    947 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs URECPEv4i32, URSQRTEv4i32)>;
    948 
    949 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instrs TBLv8i8Two)>;
    950 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^TBX(v8|v16)i8One$")>;
    951 
    952 def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
    953                                       (instrs FRECPSv4f32, FRSQRTSv4f32)>;
    954 
    955 def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
    956                                       (instrs FRECPSv2f64, FRSQRTSv2f64)>;
    957 
    958 def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^TBL(v8i8Three|v16i8Two)$")>;
    959 def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^TBX(v8i8Two|v16i8Two)$")>;
    960 
    961 def : InstRW<[FalkorWr_4VXVY_6cyc],   (instregex "^TBL(v8i8Four|v16i8Three)$")>;
    962 def : InstRW<[FalkorWr_4VXVY_6cyc],   (instregex "^TBX(v8i8Three|v16i8Three)$")>;
    963 
    964 def : InstRW<[FalkorWr_5VXVY_7cyc],   (instrs TBLv16i8Four)>;
    965 def : InstRW<[FalkorWr_5VXVY_7cyc],   (instregex "^TBX(v8i8Four|v16i8Four)$")>;
    966 
    967 // SIMD Store Instructions
    968 // -----------------------------------------------------------------------------
    969 
    970 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
    971                                        (instregex "^STR(Q|D|S|H|B)ui$")>;
    972 def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
    973                                        (instregex "^STR(Q|D|S|H|B)(post|pre)$")>;
    974 def : InstRW<[FalkorWr_STRVro, ReadDefault, FalkorReadIncSt],
    975                                        (instregex "^STR(D|S|H|B)ro(W|X)$")>;
    976 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
    977                                        (instregex "^STPQi$")>;
    978 def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
    979                                        (instregex "^STPQ(post|pre)$")>;
    980 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
    981                                        (instregex "^STP(D|S)(i)$")>;
    982 def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
    983                                        (instregex "^STP(D|S)(post|pre)$")>;
    984 def : InstRW<[FalkorWr_STRQro, ReadDefault, FalkorReadIncSt],
    985                                        (instregex "^STRQro(W|X)$")>;
    986 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
    987                                        (instregex "^STUR(Q|D|S|B|H)i$")>;
    988 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
    989                                        (instrs STNPDi, STNPSi)>;
    990 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
    991                                        (instrs STNPQi)>;
    992 
    993 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
    994                                        (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
    995 def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
    996                                        (instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>;
    997 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
    998                                        (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))$")>;
    999 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
   1000                                        (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
   1001 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
   1002                                        (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))_POST$")>;
   1003 
   1004 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
   1005                                        (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
   1006 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
   1007                                        (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
   1008 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
   1009                                        (instregex "^ST3(i8|i16|i32|i64)$")>;
   1010 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
   1011                                        (instregex "^ST4(i8|i16|i32|i64)$")>;
   1012 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
   1013 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
   1014                                        (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
   1015 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
   1016 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
   1017                                        (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
   1018 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
   1019 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
   1020                                        (instregex "^ST3(i8|i16|i32|i64)_POST$")>;
   1021 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
   1022 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
   1023                                        (instregex "^ST4(i8|i16|i32|i64)_POST$")>;
   1024 
   1025 def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
   1026                                        (instregex "^ST3Three(v8b|v4h|v2s)$")>;
   1027 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
   1028 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
   1029                                        (instregex "^ST3Three(v8b|v4h|v2s)_POST$")>;
   1030 
   1031 def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
   1032                                        (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
   1033 def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
   1034                                        (instrs ST3Threev2d)>;
   1035 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
   1036 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
   1037                                        (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
   1038 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
   1039 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
   1040                                        (instrs ST3Threev2d_POST)>;
   1041 
   1042 def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
   1043                                        (instregex "^ST4Four(v8b|v4h|v2s)$")>;
   1044 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
   1045 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
   1046                                        (instregex "^ST4Four(v8b|v4h|v2s)_POST$")>;
   1047 
   1048 def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
   1049                                        (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
   1050 def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
   1051                                        (instrs ST4Fourv2d)>;
   1052 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
   1053 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
   1054                                        (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
   1055 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
   1056 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
   1057                                        (instrs ST4Fourv2d_POST)>;
   1058 
   1059 def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
   1060                                        (instregex "^ST3Three(v16b|v8h|v4s)$")>;
   1061 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
   1062 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
   1063                                        (instregex "^ST3Three(v16b|v8h|v4s)_POST$")>;
   1064 
   1065 def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
   1066                                        (instregex "^ST4Four(v16b|v8h|v4s)$")>;
   1067 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
   1068 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
   1069                                        (instregex "^ST4Four(v16b|v8h|v4s)_POST$")>;
   1070 
   1071 // Branch Instructions
   1072 // -----------------------------------------------------------------------------
   1073 def : InstRW<[FalkorWr_1none_0cyc],   (instrs B, TCRETURNdi)>;
   1074 def : InstRW<[FalkorWr_1Z_0cyc],      (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>;
   1075 def : InstRW<[FalkorWr_1Z_0cyc],      (instrs RET_ReallyLR, TCRETURNri)>;
   1076 def : InstRW<[FalkorWr_1ZB_0cyc],     (instrs Bcc)>;
   1077 def : InstRW<[FalkorWr_1XYZB_0cyc],   (instrs BL)>;
   1078 def : InstRW<[FalkorWr_1Z_1XY_0cyc],  (instrs BLR)>;
   1079 
   1080 // Cryptography Extensions
   1081 // -----------------------------------------------------------------------------
   1082 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs SHA1Hrr)>;
   1083 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instrs AESIMCrr, AESMCrr)>;
   1084 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs AESDrr, AESErr)>;
   1085 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>;
   1086 def : InstRW<[FalkorWr_1VX_1VY_4cyc], (instregex "^SHA1(C|M|P)rrr$")>;
   1087 def : InstRW<[FalkorWr_1VX_1VY_5cyc], (instrs SHA256H2rrr, SHA256Hrrr)>;
   1088 def : InstRW<[FalkorWr_4VXVY_3cyc],   (instrs SHA256SU1rrr)>;
   1089 
   1090 // FP Load Instructions
   1091 // -----------------------------------------------------------------------------
   1092 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
   1093                                       (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
   1094 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
   1095                                       (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
   1096 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
   1097                                       (instregex "^LDUR(Q|D|S|H|B)i$")>;
   1098 def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd],
   1099                                       (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
   1100 def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
   1101                                       (instrs LDNPQi)>;
   1102 def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
   1103                                       (instrs LDPQi)>;
   1104 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
   1105                                       (instregex "LDNP(D|S)i$")>;
   1106 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
   1107                                       (instregex "LDP(D|S)i$")>;
   1108 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
   1109                                       (instregex "LDP(D|S)(pre|post)$")>;
   1110 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
   1111                                       (instregex "^LDPQ(pre|post)$")>;
   1112 
   1113 // FP Data Processing Instructions
   1114 // -----------------------------------------------------------------------------
   1115 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FCCMP(E)?(S|D)rr$")>;
   1116 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FCMP(E)?(S|D)r(r|i)$")>;
   1117 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>;
   1118 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(FABS|FNEG)(S|D)r$")>;
   1119 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FCSEL(S|D)rrr$")>;
   1120 
   1121 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^F(MAX|MIN)(NM)?(S|D)rr$")>;
   1122 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^F(MAX|MIN)(NM)?Pv2i(32|64)p$")>;
   1123 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instrs FCVTSHr, FCVTDHr)>;
   1124 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FRINT(A|I|M|N|P|X|Z)(S|D)r$")>;
   1125 
   1126 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^FABD(32|64)$")>;
   1127 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(FADD|FSUB)(S|D)rr$")>;
   1128 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs FCVTHSr, FCVTHDr)>;
   1129 
   1130 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instrs FCVTSDr, FCVTDSr)>;
   1131 
   1132 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
   1133                                       (instregex "^F(N)?MULSrr$")>;
   1134 
   1135 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
   1136                                       (instregex "^F(N)?MULDrr$")>;
   1137 
   1138 def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVSrr)>;
   1139 def : InstRW<[FalkorWr_1VX_1VY_14cyc],(instrs FDIVDrr)>;
   1140 def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTSr)>;
   1141 def : InstRW<[FalkorWr_1VX_1VY_21cyc],(instrs FSQRTDr)>;
   1142 
   1143 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32],
   1144                                       (instregex "^F(N)?M(ADD|SUB)Srrr$")>;
   1145 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64],
   1146                                       (instregex "^F(N)?M(ADD|SUB)Drrr$")>;
   1147 
   1148 // FP Miscellaneous Instructions
   1149 // -----------------------------------------------------------------------------
   1150 def : InstRW<[FalkorWr_FMOV],         (instregex "^FMOV(WS|XD|XDHigh)r$")>;
   1151 def : InstRW<[FalkorWr_1GTOV_0cyc],   (instregex "^FMOV(S|D)i$")>; // imm fwd
   1152 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>;
   1153 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FCVTZ(S|U)(d|s)$")>;
   1154 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FMOV(SW|DX|DXHigh)r$")>;
   1155 def : InstRW<[FalkorWr_1VXVY_0cyc],   (instregex "^FMOV(Sr|Dr|v.*_ns)$")>; // imm fwd
   1156 // FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr
   1157 def : InstRW<[FalkorWr_2VXVY_0cyc],   (instrs FMOVD0, FMOVS0)>; // imm fwd
   1158 
   1159 def : InstRW<[FalkorWr_1GTOV_4cyc],   (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>;
   1160 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>;
   1161 
   1162 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>;
   1163 
   1164 // Load Instructions
   1165 // -----------------------------------------------------------------------------
   1166 def : InstRW<[FalkorWr_1ST_0cyc],     (instrs PRFMui, PRFMl)>;
   1167 def : InstRW<[FalkorWr_1ST_0cyc],     (instrs PRFUMi)>;
   1168 def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
   1169                                       (instregex "^LDNP(W|X)i$")>;
   1170 def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
   1171                                       (instregex "^LDP(W|X)i$")>;
   1172 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
   1173                                       (instregex "^LDP(W|X)(post|pre)$")>;
   1174 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
   1175                                       (instregex "^LDR(BB|HH|W|X)ui$")>;
   1176 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
   1177                                       (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
   1178 def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd],
   1179                                       (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>;
   1180 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
   1181                                       (instregex "^LDR(W|X)l$")>;
   1182 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
   1183                                       (instregex "^LDTR(B|H|W|X)i$")>;
   1184 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
   1185                                       (instregex "^LDUR(BB|HH|W|X)i$")>;
   1186 def : InstRW<[FalkorWr_PRFMro],       (instregex "^PRFMro(W|X)$")>;
   1187 def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd],
   1188                                       (instrs LDPSWi)>;
   1189 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd],
   1190                                       (instregex "^LDPSW(post|pre)$")>;
   1191 def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
   1192                                       (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
   1193 def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4cyc, FalkorReadIncLd],
   1194                                       (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
   1195 def : InstRW<[FalkorWr_LDRSro, FalkorReadIncLd],
   1196                                       (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
   1197 def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
   1198                                       (instrs LDRSWl)>;
   1199 def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
   1200                                       (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
   1201 def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
   1202                                       (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
   1203 
   1204 // Miscellaneous Data-Processing Instructions
   1205 // -----------------------------------------------------------------------------
   1206 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(S|U)?BFM(W|X)ri$")>;
   1207 def : InstRW<[FalkorWr_1X_2cyc],      (instregex "^CRC32.*$")>;
   1208 def : InstRW<[FalkorWr_1XYZ_2cyc],    (instregex "^(CLS|CLZ|RBIT|REV|REV16|REV32)(W|X)r$")>;
   1209 def : InstRW<[FalkorWr_2XYZ_2cyc],    (instregex "^EXTR(W|X)rri$")>;
   1210 
   1211 // Divide and Multiply Instructions
   1212 // -----------------------------------------------------------------------------
   1213 def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
   1214                                         (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
   1215 def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32],
   1216                                         (instregex "^M(ADD|SUB)Wrrr$")>;
   1217 
   1218 def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>;
   1219 def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
   1220                                         (instregex "^M(ADD|SUB)Xrrr$")>;
   1221 
   1222 def : InstRW<[FalkorWr_1X_1Z_8cyc],     (instregex "^(S|U)DIVWr$")>;
   1223 def : InstRW<[FalkorWr_1X_1Z_11cyc],    (instregex "^(S|U)DIVXr$")>;
   1224 
   1225 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
   1226                                         (instregex "^(S|U)MULLv.*$")>;
   1227 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
   1228                                         (instregex "^(S|U)(MLAL|MLSL)v.*$")>;
   1229 
   1230 // Move and Shift Instructions
   1231 // -----------------------------------------------------------------------------
   1232 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(LSLV|LSRV|ASRV|RORV)(W|X)r$")>;
   1233 def : InstRW<[FalkorWr_1XYZ_0cyc],    (instregex "^MOVK(W|X)i$")>; // imm fwd
   1234 def : InstRW<[FalkorWr_1XYZB_0cyc],   (instregex "^ADRP?$")>; // imm fwd
   1235 def : InstRW<[FalkorWr_1XYZB_0cyc],   (instregex "^MOVN(W|X)i$")>; // imm fwd
   1236 def : InstRW<[FalkorWr_MOVZ],         (instregex "^MOVZ(W|X)i$")>;
   1237 def : InstRW<[FalkorWr_1XYZ_0cyc],    (instrs MOVi32imm, MOVi64imm)>; // imm fwd (approximation)
   1238 def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>],
   1239                                       (instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>;
   1240 def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>],
   1241                                       (instrs LOADgot)>;
   1242 
   1243 // Other Instructions
   1244 // -----------------------------------------------------------------------------
   1245 def : InstRW<[FalkorWr_1LD_0cyc],     (instrs CLREX, DMB, DSB)>;
   1246 def : InstRW<[FalkorWr_1none_0cyc],   (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HLT, HVC, ISB, SMC, SVC)>;
   1247 def : InstRW<[FalkorWr_1ST_0cyc],     (instrs SYSxt, SYSLxt)>;
   1248 def : InstRW<[FalkorWr_1Z_0cyc],      (instrs MSRpstateImm1, MSRpstateImm4)>;
   1249 
   1250 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
   1251                                       (instregex "^(LDAR(B|H|W|X)|LDAXR(B|H|W|X)|LDXR(B|H|W|X))$")>;
   1252 def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
   1253                                       (instregex "^(LDAXP(W|X)|LDXP(W|X))$")>;
   1254 def : InstRW<[FalkorWr_1LD_3cyc],     (instrs MRS, MOVbaseTLS)>;
   1255 
   1256 def : InstRW<[FalkorWr_1LD_1Z_3cyc],  (instrs DRPS)>;
   1257 
   1258 def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>;
   1259 def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
   1260                                       (instrs STNPWi, STNPXi)>;
   1261 def : InstRW<[FalkorWr_2LD_1Z_3cyc],  (instrs ERET)>;
   1262 
   1263 def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>;
   1264 def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, FalkorReadIncSt],
   1265                                       (instregex "^STLR(B|H|W|X)$")>;
   1266 def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
   1267                                       (instregex "^STXP(W|X)$")>;
   1268 def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
   1269                                       (instregex "^STXR(B|H|W|X)$")>;
   1270 
   1271 def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, ReadDefault, FalkorReadIncSt],
   1272                                       (instregex "^STLXP(W|X)$")>;
   1273 def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
   1274                                       (instregex "^STLXR(B|H|W|X)$")>;
   1275 
   1276 // Store Instructions
   1277 // -----------------------------------------------------------------------------
   1278 def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
   1279                                           (instregex "^STP(W|X)i$")>;
   1280 def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
   1281                                           (instregex "^STP(W|X)(post|pre)$")>;
   1282 def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
   1283                                           (instregex "^STR(BB|HH|W|X)ui$")>;
   1284 def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
   1285                                           (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
   1286 def : InstRW<[FalkorWr_STRro, ReadDefault, FalkorReadIncSt],
   1287                                           (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
   1288 def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
   1289                                           (instregex "^STTR(B|H|W|X)i$")>;
   1290 def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
   1291                                           (instregex "^STUR(BB|HH|W|X)i$")>;
   1292 
   1293