Home | History | Annotate | Download | only in AArch64
      1 //=- AArch64SchedVulcan.td - Vulcan Scheduling Defs ----------*- tablegen -*-=//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 // 1. Introduction
     10 //
     11 // This file defines the machine model for Broadcom Vulcan to support
     12 // instruction scheduling and other instruction cost heuristics.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 //===----------------------------------------------------------------------===//
     17 // 2. Pipeline Description.
     18 
     19 def VulcanModel : SchedMachineModel {
     20   let IssueWidth            =   4; // 4 micro-ops dispatched at a time.
     21   let MicroOpBufferSize     = 180; // 180 entries in micro-op re-order buffer.
     22   let LoadLatency           =   4; // Optimistic load latency.
     23   let MispredictPenalty     =  12; // Extra cycles for mispredicted branch.
     24   // Determined via a mix of micro-arch details and experimentation.
     25   let LoopMicroOpBufferSize =  32; 
     26   let PostRAScheduler       =   1; // Using PostRA sched.
     27   let CompleteModel         =   1;
     28 }
     29 
     30 // Define the issue ports.
     31 
     32 // Port 0: ALU, FP/SIMD.
     33 def VulcanP0 : ProcResource<1>;
     34 
     35 // Port 1: ALU, FP/SIMD, integer mul/div.
     36 def VulcanP1 : ProcResource<1>;
     37 
     38 // Port 2: ALU, Branch.
     39 def VulcanP2 : ProcResource<1>;
     40 
     41 // Port 3: Store data.
     42 def VulcanP3 : ProcResource<1>;
     43 
     44 // Port 4: Load/store.
     45 def VulcanP4 : ProcResource<1>;
     46 
     47 // Port 5: Load/store.
     48 def VulcanP5 : ProcResource<1>;
     49 
     50 let SchedModel = VulcanModel in {
     51 
     52 // Define groups for the functional units on each
     53 // issue port.  Each group created will be used
     54 // by a WriteRes later on.
     55 //
     56 // NOTE: Some groups only contain one member.  This
     57 // is a way to create names for the various functional
     58 // units that share a single issue port.  For example,
     59 // "VulcanI1" for ALU ops on port 1 and "VulcanF1" for
     60 // FP ops on port 1.
     61 
     62 // Integer divide and multiply micro-ops only on port 1.
     63 def VulcanI1 : ProcResGroup<[VulcanP1]>;
     64 
     65 // Branch micro-ops only on port 2.
     66 def VulcanI2 : ProcResGroup<[VulcanP2]>;
     67 
     68 // ALU micro-ops on ports 0, 1, and 2.
     69 def VulcanI012 : ProcResGroup<[VulcanP0, VulcanP1, VulcanP2]>;
     70 
     71 // Crypto FP/SIMD micro-ops only on port 1.
     72 def VulcanF1 : ProcResGroup<[VulcanP1]>;
     73 
     74 // FP/SIMD micro-ops on ports 0 and 1.
     75 def VulcanF01 : ProcResGroup<[VulcanP0, VulcanP1]>;
     76 
     77 // Store data micro-ops only on port 3.
     78 def VulcanSD : ProcResGroup<[VulcanP3]>;
     79 
     80 // Load/store micro-ops on ports 4 and 5.
     81 def VulcanLS01 : ProcResGroup<[VulcanP4, VulcanP5]>;
     82 
     83 // 60 entry unified scheduler.
     84 def VulcanAny : ProcResGroup<[VulcanP0, VulcanP1, VulcanP2,
     85                               VulcanP3, VulcanP4, VulcanP5]> {
     86   let BufferSize=60;
     87 }
     88 
     89 // Define commonly used write types for InstRW specializations.
     90 // All definitions follow the format: VulcanWrite_<NumCycles>Cyc_<Resources>.
     91 
     92 // 3 cycles on I1.
     93 def VulcanWrite_3Cyc_I1 : SchedWriteRes<[VulcanI1]> { let Latency = 3; }
     94 
     95 // 4 cycles on I1.
     96 def VulcanWrite_4Cyc_I1 : SchedWriteRes<[VulcanI1]> { let Latency = 4; }
     97 
     98 // 1 cycle on I0, I1, or I2.
     99 def VulcanWrite_1Cyc_I012 : SchedWriteRes<[VulcanI012]> { let Latency = 1; }
    100 
    101 // 5 cycles on F1.
    102 def VulcanWrite_5Cyc_F1 : SchedWriteRes<[VulcanF1]> { let Latency = 5; }
    103 
    104 // 7 cycles on F1.
    105 def VulcanWrite_7Cyc_F1 : SchedWriteRes<[VulcanF1]> { let Latency = 7; }
    106 
    107 // 4 cycles on F0 or F1.
    108 def VulcanWrite_4Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 4; }
    109 
    110 // 5 cycles on F0 or F1.
    111 def VulcanWrite_5Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 5; }
    112 
    113 // 6 cycles on F0 or F1.
    114 def VulcanWrite_6Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 6; }
    115 
    116 // 7 cycles on F0 or F1.
    117 def VulcanWrite_7Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 7; }
    118 
    119 // 8 cycles on F0 or F1.
    120 def VulcanWrite_8Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 8; }
    121 
    122 // 16 cycles on F0 or F1.
    123 def VulcanWrite_16Cyc_F01 : SchedWriteRes<[VulcanF01]> {
    124   let Latency = 16;
    125   let ResourceCycles = [8];
    126 }
    127 
    128 // 23 cycles on F0 or F1.
    129 def VulcanWrite_23Cyc_F01 : SchedWriteRes<[VulcanF01]> {
    130   let Latency = 23;
    131   let ResourceCycles = [11];
    132 }
    133 
    134 // 1 cycles on LS0 or LS1.
    135 def VulcanWrite_1Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 1; }
    136 
    137 // 4 cycles on LS0 or LS1.
    138 def VulcanWrite_4Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 4; }
    139 
    140 // 5 cycles on LS0 or LS1.
    141 def VulcanWrite_5Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 5; }
    142 
    143 // 6 cycles on LS0 or LS1.
    144 def VulcanWrite_6Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 6; }
    145 
    146 // 5 cycles on LS0 or LS1 and I0, I1, or I2.
    147 def VulcanWrite_5Cyc_LS01_I012 : SchedWriteRes<[VulcanLS01, VulcanI012]> {
    148   let Latency = 5;
    149   let NumMicroOps = 2;
    150 }
    151 
    152 // 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
    153 def VulcanWrite_6Cyc_LS01_I012_I012 : 
    154   SchedWriteRes<[VulcanLS01, VulcanI012, VulcanI012]> {
    155   let Latency = 6;
    156   let NumMicroOps = 3;
    157 }
    158 
    159 // 1 cycles on LS0 or LS1 and F0 or F1.
    160 def VulcanWrite_1Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
    161   let Latency = 1;
    162   let NumMicroOps = 2;
    163 }
    164 
    165 // 5 cycles on LS0 or LS1 and F0 or F1.
    166 def VulcanWrite_5Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
    167   let Latency = 5;
    168   let NumMicroOps = 2;
    169 }
    170 
    171 // 6 cycles on LS0 or LS1 and F0 or F1.
    172 def VulcanWrite_6Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
    173   let Latency = 6;
    174   let NumMicroOps = 2;
    175 }
    176 
    177 // 7 cycles on LS0 or LS1 and F0 or F1.
    178 def VulcanWrite_7Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
    179   let Latency = 7;
    180   let NumMicroOps = 2;
    181 }
    182 
    183 // 8 cycles on LS0 or LS1 and F0 or F1.
    184 def VulcanWrite_8Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
    185   let Latency = 8;
    186   let NumMicroOps = 2;
    187 }
    188 
    189 // Define commonly used read types.
    190 
    191 // No forwarding is provided for these types.
    192 def : ReadAdvance<ReadI,       0>;
    193 def : ReadAdvance<ReadISReg,   0>;
    194 def : ReadAdvance<ReadIEReg,   0>;
    195 def : ReadAdvance<ReadIM,      0>;
    196 def : ReadAdvance<ReadIMA,     0>;
    197 def : ReadAdvance<ReadID,      0>;
    198 def : ReadAdvance<ReadExtrHi,  0>;
    199 def : ReadAdvance<ReadAdrBase, 0>;
    200 def : ReadAdvance<ReadVLD,     0>;
    201 
    202 }
    203 
    204 
    205 //===----------------------------------------------------------------------===//
    206 // 3. Instruction Tables.
    207 
    208 let SchedModel = VulcanModel in {
    209 
    210 //---
    211 // 3.1 Branch Instructions
    212 //---
    213 
    214 // Branch, immed
    215 // Branch and link, immed
    216 // Compare and branch
    217 def : WriteRes<WriteBr,      [VulcanI2]> { let Latency = 1; }
    218 
    219 def : WriteRes<WriteSys,     []> { let Latency = 1; }
    220 def : WriteRes<WriteBarrier, []> { let Latency = 1; }
    221 def : WriteRes<WriteHint,    []> { let Latency = 1; }
    222 
    223 def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
    224 
    225 // Branch, register
    226 // Branch and link, register != LR
    227 // Branch and link, register = LR
    228 def : WriteRes<WriteBrReg,   [VulcanI2]> { let Latency = 1; }
    229 
    230 //---
    231 // 3.2 Arithmetic and Logical Instructions
    232 // 3.3 Move and Shift Instructions
    233 //---
    234 
    235 // ALU, basic
    236 // Conditional compare
    237 // Conditional select
    238 // Address generation
    239 def : WriteRes<WriteI,       [VulcanI012]> { let Latency = 1; }
    240 def : InstRW<[WriteI], (instrs COPY)>;
    241 
    242 // ALU, extend and/or shift
    243 def : WriteRes<WriteISReg,   [VulcanI012]> {
    244   let Latency = 2;
    245   let ResourceCycles = [2];
    246 }
    247 
    248 def : WriteRes<WriteIEReg,   [VulcanI012]> {
    249   let Latency = 2;
    250   let ResourceCycles = [2];
    251 }
    252 
    253 // Move immed
    254 def : WriteRes<WriteImm,     [VulcanI012]> { let Latency = 1; }
    255 
    256 // Variable shift
    257 def : WriteRes<WriteIS,      [VulcanI012]> { let Latency = 1; }
    258 
    259 //---
    260 // 3.4 Divide and Multiply Instructions
    261 //---
    262 
    263 // Divide, W-form
    264 // Latency range of 13-23.  Take the average.
    265 def : WriteRes<WriteID32,    [VulcanI1]> {
    266   let Latency = 18;
    267   let ResourceCycles = [18];
    268 }
    269 
    270 // Divide, X-form
    271 // Latency range of 13-39.  Take the average.
    272 def : WriteRes<WriteID64,    [VulcanI1]> {
    273   let Latency = 26;
    274   let ResourceCycles = [26];
    275 }
    276 
    277 // Multiply accumulate, W-form
    278 def : WriteRes<WriteIM32,    [VulcanI012]> { let Latency = 5; }
    279 
    280 // Multiply accumulate, X-form
    281 def : WriteRes<WriteIM64,    [VulcanI012]> { let Latency = 5; }
    282 
    283 // Bitfield extract, two reg
    284 def : WriteRes<WriteExtr,    [VulcanI012]> { let Latency = 1; }
    285 
    286 // Bitfield move, basic
    287 // Bitfield move, insert
    288 // NOTE: Handled by WriteIS.
    289 
    290 // Count leading
    291 def : InstRW<[VulcanWrite_3Cyc_I1], (instregex "^CLS(W|X)r$",
    292                                                "^CLZ(W|X)r$")>;
    293 
    294 // Reverse bits/bytes
    295 // NOTE: Handled by WriteI.
    296 
    297 //---
    298 // 3.6 Load Instructions 
    299 // 3.10 FP Load Instructions
    300 //---
    301 
    302 // Load register, literal
    303 // Load register, unscaled immed
    304 // Load register, immed unprivileged
    305 // Load register, unsigned immed
    306 def : WriteRes<WriteLD,      [VulcanLS01]> { let Latency = 4; }
    307 
    308 // Load register, immed post-index
    309 // NOTE: Handled by WriteLD, WriteI.
    310 // Load register, immed pre-index
    311 // NOTE: Handled by WriteLD, WriteAdr.
    312 def : WriteRes<WriteAdr,     [VulcanI012]> { let Latency = 1; }
    313 
    314 // Load register offset, basic
    315 // Load register, register offset, scale by 4/8
    316 // Load register, register offset, scale by 2
    317 // Load register offset, extend
    318 // Load register, register offset, extend, scale by 4/8
    319 // Load register, register offset, extend, scale by 2
    320 def VulcanWriteLDIdx : SchedWriteVariant<[
    321   SchedVar<ScaledIdxPred, [VulcanWrite_6Cyc_LS01_I012_I012]>,
    322   SchedVar<NoSchedPred,   [VulcanWrite_5Cyc_LS01_I012]>]>;
    323 def : SchedAlias<WriteLDIdx, VulcanWriteLDIdx>;
    324 
    325 def VulcanReadAdrBase : SchedReadVariant<[
    326   SchedVar<ScaledIdxPred, [ReadDefault]>,
    327   SchedVar<NoSchedPred,   [ReadDefault]>]>;
    328 def : SchedAlias<ReadAdrBase, VulcanReadAdrBase>;
    329 
    330 // Load pair, immed offset, normal
    331 // Load pair, immed offset, signed words, base != SP
    332 // Load pair, immed offset signed words, base = SP
    333 // LDP only breaks into *one* LS micro-op.  Thus
    334 // the resources are handling by WriteLD.
    335 def : WriteRes<WriteLDHi,    []> {
    336   let Latency = 5;
    337 }
    338 
    339 // Load pair, immed pre-index, normal
    340 // Load pair, immed pre-index, signed words
    341 // Load pair, immed post-index, normal
    342 // Load pair, immed post-index, signed words
    343 // NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
    344 
    345 //--
    346 // 3.7 Store Instructions 
    347 // 3.11 FP Store Instructions
    348 //--
    349 
    350 // Store register, unscaled immed
    351 // Store register, immed unprivileged
    352 // Store register, unsigned immed
    353 def : WriteRes<WriteST,      [VulcanLS01, VulcanSD]> {
    354   let Latency = 1;
    355   let NumMicroOps = 2;
    356 }
    357 
    358 // Store register, immed post-index
    359 // NOTE: Handled by WriteAdr, WriteST, ReadAdrBase
    360 
    361 // Store register, immed pre-index
    362 // NOTE: Handled by WriteAdr, WriteST
    363 
    364 // Store register, register offset, basic
    365 // Store register, register offset, scaled by 4/8
    366 // Store register, register offset, scaled by 2
    367 // Store register, register offset, extend
    368 // Store register, register offset, extend, scale by 4/8
    369 // Store register, register offset, extend, scale by 1
    370 def : WriteRes<WriteSTIdx, [VulcanLS01, VulcanSD, VulcanI012]> {
    371   let Latency = 1;
    372   let NumMicroOps = 3;
    373 }
    374 
    375 // Store pair, immed offset, W-form
    376 // Store pair, immed offset, X-form
    377 def : WriteRes<WriteSTP,     [VulcanLS01, VulcanSD]> {
    378   let Latency = 1;
    379   let NumMicroOps = 2;
    380 }
    381 
    382 // Store pair, immed post-index, W-form
    383 // Store pair, immed post-index, X-form
    384 // Store pair, immed pre-index, W-form
    385 // Store pair, immed pre-index, X-form
    386 // NOTE: Handled by WriteAdr, WriteSTP.
    387 
    388 //---
    389 // 3.8 FP Data Processing Instructions
    390 //---
    391 
    392 // FP absolute value
    393 // FP min/max
    394 // FP negate
    395 def : WriteRes<WriteF,       [VulcanF01]> { let Latency = 5; }
    396 
    397 // FP arithmetic
    398 def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FADD", "^FSUB")>;
    399 
    400 // FP compare
    401 def : WriteRes<WriteFCmp,    [VulcanF01]> { let Latency = 5; }
    402 
    403 // FP divide, S-form
    404 // FP square root, S-form
    405 def : WriteRes<WriteFDiv,    [VulcanF01]> {
    406   let Latency = 16;
    407   let ResourceCycles = [8];
    408 }
    409 
    410 // FP divide, D-form
    411 // FP square root, D-form
    412 def : InstRW<[VulcanWrite_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>;
    413 
    414 // FP multiply
    415 // FP multiply accumulate
    416 def : WriteRes<WriteFMul, [VulcanF01]> { let Latency = 6; }
    417 
    418 // FP round to integral
    419 def : InstRW<[VulcanWrite_7Cyc_F01],
    420             (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
    421 
    422 // FP select
    423 def : InstRW<[VulcanWrite_4Cyc_F01], (instregex "^FCSEL")>;
    424 
    425 //---
    426 // 3.9 FP Miscellaneous Instructions
    427 //---
    428 
    429 // FP convert, from vec to vec reg
    430 // FP convert, from gen to vec reg
    431 // FP convert, from vec to gen reg
    432 def : WriteRes<WriteFCvt, [VulcanF01]> { let Latency = 7; }
    433 
    434 // FP move, immed
    435 // FP move, register
    436 def : WriteRes<WriteFImm, [VulcanF01]> { let Latency = 4; }
    437 
    438 // FP transfer, from gen to vec reg
    439 // FP transfer, from vec to gen reg
    440 def : WriteRes<WriteFCopy, [VulcanF01]> { let Latency = 4; }
    441 def : InstRW<[VulcanWrite_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
    442 
    443 //---
    444 // 3.12 ASIMD Integer Instructions
    445 //---
    446 
    447 // ASIMD absolute diff, D-form
    448 // ASIMD absolute diff, Q-form
    449 // ASIMD absolute diff accum, D-form
    450 // ASIMD absolute diff accum, Q-form
    451 // ASIMD absolute diff accum long
    452 // ASIMD absolute diff long
    453 // ASIMD arith, basic
    454 // ASIMD arith, complex
    455 // ASIMD compare
    456 // ASIMD logical (AND, BIC, EOR)
    457 // ASIMD max/min, basic
    458 // ASIMD max/min, reduce, 4H/4S
    459 // ASIMD max/min, reduce, 8B/8H
    460 // ASIMD max/min, reduce, 16B
    461 // ASIMD multiply, D-form
    462 // ASIMD multiply, Q-form
    463 // ASIMD multiply accumulate long
    464 // ASIMD multiply accumulate saturating long
    465 // ASIMD multiply long
    466 // ASIMD pairwise add and accumulate
    467 // ASIMD shift accumulate
    468 // ASIMD shift by immed, basic
    469 // ASIMD shift by immed and insert, basic, D-form
    470 // ASIMD shift by immed and insert, basic, Q-form
    471 // ASIMD shift by immed, complex
    472 // ASIMD shift by register, basic, D-form
    473 // ASIMD shift by register, basic, Q-form
    474 // ASIMD shift by register, complex, D-form
    475 // ASIMD shift by register, complex, Q-form
    476 def : WriteRes<WriteV, [VulcanF01]> { let Latency = 7; }
    477 
    478 // ASIMD arith, reduce, 4H/4S
    479 // ASIMD arith, reduce, 8B/8H
    480 // ASIMD arith, reduce, 16B
    481 def : InstRW<[VulcanWrite_5Cyc_F01], 
    482             (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
    483 
    484 // ASIMD logical (MOV, MVN, ORN, ORR)
    485 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>;
    486 
    487 // ASIMD polynomial (8x8) multiply long
    488 def : InstRW<[VulcanWrite_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>;
    489 
    490 //---
    491 // 3.13 ASIMD Floating-point Instructions
    492 //---
    493 
    494 // ASIMD FP absolute value
    495 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FABSv")>;
    496 
    497 // ASIMD FP arith, normal, D-form
    498 // ASIMD FP arith, normal, Q-form
    499 def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>;
    500 
    501 // ASIMD FP arith,pairwise, D-form
    502 // ASIMD FP arith, pairwise, Q-form
    503 def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FADDPv")>;
    504 
    505 // ASIMD FP compare, D-form
    506 // ASIMD FP compare, Q-form
    507 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>;
    508 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
    509                                                 "^FCMGTv", "^FCMLEv",
    510                                                 "^FCMLTv")>;
    511 
    512 // ASIMD FP convert, long
    513 // ASIMD FP convert, narrow
    514 // ASIMD FP convert, other, D-form
    515 // ASIMD FP convert, other, Q-form
    516 // NOTE: Handled by WriteV.
    517 
    518 // ASIMD FP divide, D-form, F32
    519 def : InstRW<[VulcanWrite_16Cyc_F01], (instrs FDIVv2f32)>;
    520 
    521 // ASIMD FP divide, Q-form, F32
    522 def : InstRW<[VulcanWrite_16Cyc_F01], (instrs FDIVv4f32)>;
    523 
    524 // ASIMD FP divide, Q-form, F64
    525 def : InstRW<[VulcanWrite_23Cyc_F01], (instrs FDIVv2f64)>;
    526 
    527 // ASIMD FP max/min, normal, D-form
    528 // ASIMD FP max/min, normal, Q-form
    529 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv",
    530                                                 "^FMINv", "^FMINNMv")>;
    531 
    532 // ASIMD FP max/min, pairwise, D-form
    533 // ASIMD FP max/min, pairwise, Q-form
    534 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv",
    535                                                 "^FMINPv", "^FMINNMPv")>;
    536 
    537 // ASIMD FP max/min, reduce
    538 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv",
    539                                                 "^FMINVv", "^FMINNMVv")>;
    540 
    541 // ASIMD FP multiply, D-form, FZ
    542 // ASIMD FP multiply, D-form, no FZ
    543 // ASIMD FP multiply, Q-form, FZ
    544 // ASIMD FP multiply, Q-form, no FZ
    545 def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>;
    546 
    547 // ASIMD FP multiply accumulate, Dform, FZ
    548 // ASIMD FP multiply accumulate, Dform, no FZ
    549 // ASIMD FP multiply accumulate, Qform, FZ
    550 // ASIMD FP multiply accumulate, Qform, no FZ
    551 def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>;
    552 
    553 // ASIMD FP negate
    554 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FNEGv")>;
    555 
    556 // ASIMD FP round, D-form
    557 // ASIMD FP round, Q-form
    558 // NOTE: Handled by WriteV.
    559 
    560 //--
    561 // 3.14 ASIMD Miscellaneous Instructions
    562 //--
    563 
    564 // ASIMD bit reverse
    565 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^RBITv")>;
    566 
    567 // ASIMD bitwise insert, D-form
    568 // ASIMD bitwise insert, Q-form
    569 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>;
    570 
    571 // ASIMD count, D-form
    572 // ASIMD count, Q-form
    573 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>;
    574 
    575 // ASIMD duplicate, gen reg
    576 // ASIMD duplicate, element
    577 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^DUPv")>;
    578 
    579 // ASIMD extract
    580 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^EXTv")>;
    581 
    582 // ASIMD extract narrow
    583 // ASIMD extract narrow, saturating
    584 // NOTE: Handled by WriteV.
    585 
    586 // ASIMD insert, element to element
    587 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^INSv")>;
    588 
    589 // ASIMD move, integer immed
    590 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>;
    591 
    592 // ASIMD move, FP immed
    593 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMOVv")>;
    594 
    595 // ASIMD reciprocal estimate, D-form
    596 // ASIMD reciprocal estimate, Q-form
    597 def : InstRW<[VulcanWrite_5Cyc_F01], 
    598             (instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
    599                                    "^FRSQRTEv", "^URSQRTEv")>;
    600 
    601 // ASIMD reciprocal step, D-form, FZ
    602 // ASIMD reciprocal step, D-form, no FZ
    603 // ASIMD reciprocal step, Q-form, FZ
    604 // ASIMD reciprocal step, Q-form, no FZ
    605 def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>;
    606 
    607 // ASIMD reverse
    608 def : InstRW<[VulcanWrite_5Cyc_F01], 
    609             (instregex "^REV16v", "^REV32v", "^REV64v")>;
    610 
    611 // ASIMD table lookup, D-form
    612 // ASIMD table lookup, Q-form
    613 def : InstRW<[VulcanWrite_8Cyc_F01], (instregex "^TBLv", "^TBXv")>;
    614 
    615 // ASIMD transfer, element to word or word
    616 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^UMOVv")>;
    617 
    618 // ASIMD transfer, element to gen reg
    619 def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>;
    620 
    621 // ASIMD transfer gen reg to element
    622 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^INSv")>;
    623 
    624 // ASIMD transpose
    625 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^TRN1v", "^TRN2v",
    626                                                 "^UZP1v", "^UZP2v")>;
    627 
    628 // ASIMD unzip/zip
    629 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>;
    630 
    631 //--
    632 // 3.15 ASIMD Load Instructions 
    633 //--
    634 
    635 // ASIMD load, 1 element, multiple, 1 reg, D-form
    636 // ASIMD load, 1 element, multiple, 1 reg, Q-form
    637 def : InstRW<[VulcanWrite_4Cyc_LS01], 
    638             (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
    639 def : InstRW<[VulcanWrite_4Cyc_LS01, WriteAdr], 
    640             (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
    641 
    642 // ASIMD load, 1 element, multiple, 2 reg, D-form
    643 // ASIMD load, 1 element, multiple, 2 reg, Q-form
    644 def : InstRW<[VulcanWrite_4Cyc_LS01], 
    645             (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
    646 def : InstRW<[VulcanWrite_4Cyc_LS01, WriteAdr], 
    647             (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
    648 
    649 // ASIMD load, 1 element, multiple, 3 reg, D-form
    650 // ASIMD load, 1 element, multiple, 3 reg, Q-form
    651 def : InstRW<[VulcanWrite_5Cyc_LS01], 
    652             (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
    653 def : InstRW<[VulcanWrite_5Cyc_LS01, WriteAdr], 
    654             (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
    655 
    656 // ASIMD load, 1 element, multiple, 4 reg, D-form
    657 // ASIMD load, 1 element, multiple, 4 reg, Q-form
    658 def : InstRW<[VulcanWrite_6Cyc_LS01], 
    659             (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
    660 def : InstRW<[VulcanWrite_6Cyc_LS01, WriteAdr], 
    661             (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
    662 
    663 // ASIMD load, 1 element, one lane, B/H/S
    664 // ASIMD load, 1 element, one lane, D
    665 def : InstRW<[VulcanWrite_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>;
    666 def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], 
    667             (instregex "^LD1i(8|16|32|64)_POST$")>;
    668 
    669 // ASIMD load, 1 element, all lanes, D-form, B/H/S
    670 // ASIMD load, 1 element, all lanes, D-form, D
    671 // ASIMD load, 1 element, all lanes, Q-form
    672 def : InstRW<[VulcanWrite_5Cyc_LS01_F01], 
    673             (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
    674 def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], 
    675             (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
    676 
    677 // ASIMD load, 2 element, multiple, D-form, B/H/S
    678 // ASIMD load, 2 element, multiple, Q-form, D
    679 def : InstRW<[VulcanWrite_5Cyc_LS01_F01], 
    680             (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
    681 def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], 
    682             (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
    683 
    684 // ASIMD load, 2 element, one lane, B/H
    685 // ASIMD load, 2 element, one lane, S
    686 // ASIMD load, 2 element, one lane, D
    687 def : InstRW<[VulcanWrite_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>;
    688 def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], 
    689             (instregex "^LD2i(8|16|32|64)_POST$")>;
    690 
    691 // ASIMD load, 2 element, all lanes, D-form, B/H/S
    692 // ASIMD load, 2 element, all lanes, D-form, D
    693 // ASIMD load, 2 element, all lanes, Q-form
    694 def : InstRW<[VulcanWrite_5Cyc_LS01_F01], 
    695             (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
    696 def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], 
    697             (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
    698 
    699 // ASIMD load, 3 element, multiple, D-form, B/H/S
    700 // ASIMD load, 3 element, multiple, Q-form, B/H/S
    701 // ASIMD load, 3 element, multiple, Q-form, D
    702 def : InstRW<[VulcanWrite_8Cyc_LS01_F01], 
    703             (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
    704 def : InstRW<[VulcanWrite_8Cyc_LS01_F01, WriteAdr], 
    705             (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
    706 
    707 // ASIMD load, 3 element, one lone, B/H
    708 // ASIMD load, 3 element, one lane, S
    709 // ASIMD load, 3 element, one lane, D
    710 def : InstRW<[VulcanWrite_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>;
    711 def : InstRW<[VulcanWrite_7Cyc_LS01_F01, WriteAdr], 
    712             (instregex "^LD3i(8|16|32|64)_POST$")>;
    713 
    714 // ASIMD load, 3 element, all lanes, D-form, B/H/S
    715 // ASIMD load, 3 element, all lanes, D-form, D
    716 // ASIMD load, 3 element, all lanes, Q-form, B/H/S
    717 // ASIMD load, 3 element, all lanes, Q-form, D
    718 def : InstRW<[VulcanWrite_7Cyc_LS01_F01], 
    719             (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
    720 def : InstRW<[VulcanWrite_7Cyc_LS01_F01, WriteAdr], 
    721             (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
    722 
    723 // ASIMD load, 4 element, multiple, D-form, B/H/S
    724 // ASIMD load, 4 element, multiple, Q-form, B/H/S
    725 // ASIMD load, 4 element, multiple, Q-form, D
    726 def : InstRW<[VulcanWrite_8Cyc_LS01_F01], 
    727             (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
    728 def : InstRW<[VulcanWrite_8Cyc_LS01_F01, WriteAdr], 
    729             (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
    730 
    731 // ASIMD load, 4 element, one lane, B/H
    732 // ASIMD load, 4 element, one lane, S
    733 // ASIMD load, 4 element, one lane, D
    734 def : InstRW<[VulcanWrite_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>;
    735 def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr], 
    736             (instregex "^LD4i(8|16|32|64)_POST$")>;
    737 
    738 // ASIMD load, 4 element, all lanes, D-form, B/H/S
    739 // ASIMD load, 4 element, all lanes, D-form, D
    740 // ASIMD load, 4 element, all lanes, Q-form, B/H/S
    741 // ASIMD load, 4 element, all lanes, Q-form, D
    742 def : InstRW<[VulcanWrite_6Cyc_LS01_F01], 
    743             (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
    744 def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr], 
    745             (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
    746 
    747 //--
    748 // 3.16 ASIMD Store Instructions
    749 //--
    750 
    751 // ASIMD store, 1 element, multiple, 1 reg, D-form
    752 // ASIMD store, 1 element, multiple, 1 reg, Q-form
    753 def : InstRW<[VulcanWrite_1Cyc_LS01], 
    754             (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
    755 def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr], 
    756             (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
    757 
    758 // ASIMD store, 1 element, multiple, 2 reg, D-form
    759 // ASIMD store, 1 element, multiple, 2 reg, Q-form
    760 def : InstRW<[VulcanWrite_1Cyc_LS01], 
    761             (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
    762 def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr], 
    763             (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
    764 
    765 // ASIMD store, 1 element, multiple, 3 reg, D-form
    766 // ASIMD store, 1 element, multiple, 3 reg, Q-form
    767 def : InstRW<[VulcanWrite_1Cyc_LS01], 
    768             (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
    769 def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr], 
    770             (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
    771 
    772 // ASIMD store, 1 element, multiple, 4 reg, D-form
    773 // ASIMD store, 1 element, multiple, 4 reg, Q-form
    774 def : InstRW<[VulcanWrite_1Cyc_LS01], 
    775             (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
    776 def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr], 
    777             (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
    778 
    779 // ASIMD store, 1 element, one lane, B/H/S
    780 // ASIMD store, 1 element, one lane, D
    781 def : InstRW<[VulcanWrite_1Cyc_LS01_F01], 
    782             (instregex "^ST1i(8|16|32|64)$")>;
    783 def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], 
    784             (instregex "^ST1i(8|16|32|64)_POST$")>;
    785 
    786 // ASIMD store, 2 element, multiple, D-form, B/H/S
    787 // ASIMD store, 2 element, multiple, Q-form, B/H/S
    788 // ASIMD store, 2 element, multiple, Q-form, D
    789 def : InstRW<[VulcanWrite_1Cyc_LS01_F01], 
    790             (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
    791 def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], 
    792             (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
    793 
    794 // ASIMD store, 2 element, one lane, B/H/S
    795 // ASIMD store, 2 element, one lane, D
    796 def : InstRW<[VulcanWrite_1Cyc_LS01_F01], 
    797             (instregex "^ST2i(8|16|32|64)$")>;
    798 def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], 
    799             (instregex "^ST2i(8|16|32|64)_POST$")>;
    800 
    801 // ASIMD store, 3 element, multiple, D-form, B/H/S
    802 // ASIMD store, 3 element, multiple, Q-form, B/H/S
    803 // ASIMD store, 3 element, multiple, Q-form, D
    804 def : InstRW<[VulcanWrite_1Cyc_LS01_F01], 
    805             (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
    806 def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], 
    807             (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
    808 
    809 // ASIMD store, 3 element, one lane, B/H
    810 // ASIMD store, 3 element, one lane, S
    811 // ASIMD store, 3 element, one lane, D
    812 def : InstRW<[VulcanWrite_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>;
    813 def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], 
    814             (instregex "^ST3i(8|16|32|64)_POST$")>;
    815 
    816 // ASIMD store, 4 element, multiple, D-form, B/H/S
    817 // ASIMD store, 4 element, multiple, Q-form, B/H/S
    818 // ASIMD store, 4 element, multiple, Q-form, D
    819 def : InstRW<[VulcanWrite_1Cyc_LS01_F01], 
    820             (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
    821 def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], 
    822             (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
    823 
    824 // ASIMD store, 4 element, one lane, B/H
    825 // ASIMD store, 4 element, one lane, S
    826 // ASIMD store, 4 element, one lane, D
    827 def : InstRW<[VulcanWrite_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>;
    828 def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], 
    829             (instregex "^ST4i(8|16|32|64)_POST$")>;
    830 
    831 //--
    832 // 3.17 Cryptography Extensions
    833 //--
    834 
    835 // Crypto AES ops
    836 def : InstRW<[VulcanWrite_5Cyc_F1], (instregex "^AES")>;
    837 
    838 // Crypto polynomial (64x64) multiply long
    839 def : InstRW<[VulcanWrite_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>;
    840 
    841 // Crypto SHA1 xor ops
    842 // Crypto SHA1 schedule acceleration ops
    843 // Crypto SHA256 schedule acceleration op (1 u-op)
    844 // Crypto SHA256 schedule acceleration op (2 u-ops)
    845 // Crypto SHA256 hash acceleration ops
    846 def : InstRW<[VulcanWrite_7Cyc_F1], (instregex "^SHA")>;
    847 
    848 //--
    849 // 3.18 CRC
    850 //--
    851 
    852 // CRC checksum ops
    853 def : InstRW<[VulcanWrite_4Cyc_I1], (instregex "^CRC32")>;
    854 
    855 } // SchedModel = VulcanModel
    856