Home | History | Annotate | Download | only in X86
      1 //=- X86SchedHaswell.td - X86 Haswell Scheduling -------------*- tablegen -*-=//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the machine model for Haswell to support instruction
     11 // scheduling and other instruction cost heuristics.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 def HaswellModel : SchedMachineModel {
     16   // All x86 instructions are modeled as a single micro-op, and HW can decode 4
     17   // instructions per cycle.
     18   let IssueWidth = 4;
     19   let MicroOpBufferSize = 192; // Based on the reorder buffer.
     20   let LoadLatency = 4;
     21   let MispredictPenalty = 16;
     22 
     23   // Based on the LSD (loop-stream detector) queue size and benchmarking data.
     24   let LoopMicroOpBufferSize = 50;
     25 
     26   // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow
     27   // the scheduler to assign a default model to unrecognized opcodes.
     28   let CompleteModel = 0;
     29 }
     30 
     31 let SchedModel = HaswellModel in {
     32 
     33 // Haswell can issue micro-ops to 8 different ports in one cycle.
     34 
     35 // Ports 0, 1, 5, and 6 handle all computation.
     36 // Port 4 gets the data half of stores. Store data can be available later than
     37 // the store address, but since we don't model the latency of stores, we can
     38 // ignore that.
     39 // Ports 2 and 3 are identical. They handle loads and the address half of
     40 // stores. Port 7 can handle address calculations.
     41 def HWPort0 : ProcResource<1>;
     42 def HWPort1 : ProcResource<1>;
     43 def HWPort2 : ProcResource<1>;
     44 def HWPort3 : ProcResource<1>;
     45 def HWPort4 : ProcResource<1>;
     46 def HWPort5 : ProcResource<1>;
     47 def HWPort6 : ProcResource<1>;
     48 def HWPort7 : ProcResource<1>;
     49 
     50 // Many micro-ops are capable of issuing on multiple ports.
     51 def HWPort01  : ProcResGroup<[HWPort0, HWPort1]>;
     52 def HWPort23  : ProcResGroup<[HWPort2, HWPort3]>;
     53 def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>;
     54 def HWPort04  : ProcResGroup<[HWPort0, HWPort4]>;
     55 def HWPort05  : ProcResGroup<[HWPort0, HWPort5]>;
     56 def HWPort06  : ProcResGroup<[HWPort0, HWPort6]>;
     57 def HWPort15  : ProcResGroup<[HWPort1, HWPort5]>;
     58 def HWPort16  : ProcResGroup<[HWPort1, HWPort6]>;
     59 def HWPort56  : ProcResGroup<[HWPort5, HWPort6]>;
     60 def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>;
     61 def HWPort056 : ProcResGroup<[HWPort0, HWPort5, HWPort6]>;
     62 def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>;
     63 
     64 // 60 Entry Unified Scheduler
     65 def HWPortAny : ProcResGroup<[HWPort0, HWPort1, HWPort2, HWPort3, HWPort4,
     66                               HWPort5, HWPort6, HWPort7]> {
     67   let BufferSize=60;
     68 }
     69 
     70 // Integer division issued on port 0.
     71 def HWDivider : ProcResource<1>;
     72 
     73 // Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
     74 // cycles after the memory operand.
     75 def : ReadAdvance<ReadAfterLd, 4>;
     76 
     77 // Many SchedWrites are defined in pairs with and without a folded load.
     78 // Instructions with folded loads are usually micro-fused, so they only appear
     79 // as two micro-ops when queued in the reservation station.
     80 // This multiclass defines the resource usage for variants with and without
     81 // folded loads.
     82 multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
     83                           ProcResourceKind ExePort,
     84                           int Lat> {
     85   // Register variant is using a single cycle on ExePort.
     86   def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
     87 
     88   // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
     89   // latency.
     90   def : WriteRes<SchedRW.Folded, [HWPort23, ExePort]> {
     91      let Latency = !add(Lat, 4);
     92   }
     93 }
     94 
     95 // A folded store needs a cycle on port 4 for the store data, but it does not
     96 // need an extra port 2/3 cycle to recompute the address.
     97 def : WriteRes<WriteRMW, [HWPort4]>;
     98 
     99 // Store_addr on 237.
    100 // Store_data on 4.
    101 def : WriteRes<WriteStore, [HWPort237, HWPort4]>;
    102 def : WriteRes<WriteLoad,  [HWPort23]> { let Latency = 4; }
    103 def : WriteRes<WriteMove,  [HWPort0156]>;
    104 def : WriteRes<WriteZero,  []>;
    105 
    106 defm : HWWriteResPair<WriteALU,   HWPort0156, 1>;
    107 defm : HWWriteResPair<WriteIMul,  HWPort1,   3>;
    108 def  : WriteRes<WriteIMulH, []> { let Latency = 3; }
    109 defm : HWWriteResPair<WriteShift, HWPort06,  1>;
    110 defm : HWWriteResPair<WriteJump,  HWPort06,   1>;
    111 
    112 // This is for simple LEAs with one or two input operands.
    113 // The complex ones can only execute on port 1, and they require two cycles on
    114 // the port to read all inputs. We don't model that.
    115 def : WriteRes<WriteLEA, [HWPort15]>;
    116 
    117 // This is quite rough, latency depends on the dividend.
    118 def : WriteRes<WriteIDiv, [HWPort0, HWDivider]> {
    119   let Latency = 25;
    120   let ResourceCycles = [1, 10];
    121 }
    122 def : WriteRes<WriteIDivLd, [HWPort23, HWPort0, HWDivider]> {
    123   let Latency = 29;
    124   let ResourceCycles = [1, 1, 10];
    125 }
    126 
    127 // Scalar and vector floating point.
    128 defm : HWWriteResPair<WriteFAdd,   HWPort1, 3>;
    129 defm : HWWriteResPair<WriteFMul,   HWPort0, 5>;
    130 defm : HWWriteResPair<WriteFDiv,   HWPort0, 12>; // 10-14 cycles.
    131 defm : HWWriteResPair<WriteFRcp,   HWPort0, 5>;
    132 defm : HWWriteResPair<WriteFRsqrt, HWPort0, 5>;
    133 defm : HWWriteResPair<WriteFSqrt,  HWPort0, 15>;
    134 defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>;
    135 defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>;
    136 defm : HWWriteResPair<WriteCvtF2F, HWPort1, 3>;
    137 defm : HWWriteResPair<WriteFShuffle,  HWPort5,  1>;
    138 defm : HWWriteResPair<WriteFBlend,  HWPort015,  1>;
    139 defm : HWWriteResPair<WriteFShuffle256,  HWPort5,  3>;
    140 
    141 def : WriteRes<WriteFVarBlend, [HWPort5]> {
    142   let Latency = 2;
    143   let ResourceCycles = [2];
    144 }
    145 def : WriteRes<WriteFVarBlendLd, [HWPort5, HWPort23]> {
    146   let Latency = 6;
    147   let ResourceCycles = [2, 1];
    148 }
    149 
    150 // Vector integer operations.
    151 defm : HWWriteResPair<WriteVecShift, HWPort0,  1>;
    152 defm : HWWriteResPair<WriteVecLogic, HWPort015, 1>;
    153 defm : HWWriteResPair<WriteVecALU,   HWPort15,  1>;
    154 defm : HWWriteResPair<WriteVecIMul,  HWPort0,   5>;
    155 defm : HWWriteResPair<WriteShuffle,  HWPort5,  1>;
    156 defm : HWWriteResPair<WriteBlend,  HWPort15,  1>;
    157 defm : HWWriteResPair<WriteShuffle256,  HWPort5,  3>;
    158 
    159 def : WriteRes<WriteVarBlend, [HWPort5]> {
    160   let Latency = 2;
    161   let ResourceCycles = [2];
    162 }
    163 def : WriteRes<WriteVarBlendLd, [HWPort5, HWPort23]> {
    164   let Latency = 6;
    165   let ResourceCycles = [2, 1];
    166 }
    167 
    168 def : WriteRes<WriteVarVecShift, [HWPort0, HWPort5]> {
    169   let Latency = 2;
    170   let ResourceCycles = [2, 1];
    171 }
    172 def : WriteRes<WriteVarVecShiftLd, [HWPort0, HWPort5, HWPort23]> {
    173   let Latency = 6;
    174   let ResourceCycles = [2, 1, 1];
    175 }
    176 
    177 def : WriteRes<WriteMPSAD, [HWPort0, HWPort5]> {
    178   let Latency = 6;
    179   let ResourceCycles = [1, 2];
    180 }
    181 def : WriteRes<WriteMPSADLd, [HWPort23, HWPort0, HWPort5]> {
    182   let Latency = 6;
    183   let ResourceCycles = [1, 1, 2];
    184 }
    185 
    186 // String instructions.
    187 // Packed Compare Implicit Length Strings, Return Mask
    188 def : WriteRes<WritePCmpIStrM, [HWPort0]> {
    189   let Latency = 10;
    190   let ResourceCycles = [3];
    191 }
    192 def : WriteRes<WritePCmpIStrMLd, [HWPort0, HWPort23]> {
    193   let Latency = 10;
    194   let ResourceCycles = [3, 1];
    195 }
    196 
    197 // Packed Compare Explicit Length Strings, Return Mask
    198 def : WriteRes<WritePCmpEStrM, [HWPort0, HWPort16, HWPort5]> {
    199   let Latency = 10;
    200   let ResourceCycles = [3, 2, 4];
    201 }
    202 def : WriteRes<WritePCmpEStrMLd, [HWPort05, HWPort16, HWPort23]> {
    203   let Latency = 10;
    204   let ResourceCycles = [6, 2, 1];
    205 }
    206 
    207 // Packed Compare Implicit Length Strings, Return Index
    208 def : WriteRes<WritePCmpIStrI, [HWPort0]> {
    209   let Latency = 11;
    210   let ResourceCycles = [3];
    211 }
    212 def : WriteRes<WritePCmpIStrILd, [HWPort0, HWPort23]> {
    213   let Latency = 11;
    214   let ResourceCycles = [3, 1];
    215 }
    216 
    217 // Packed Compare Explicit Length Strings, Return Index
    218 def : WriteRes<WritePCmpEStrI, [HWPort05, HWPort16]> {
    219   let Latency = 11;
    220   let ResourceCycles = [6, 2];
    221 }
    222 def : WriteRes<WritePCmpEStrILd, [HWPort0, HWPort16, HWPort5, HWPort23]> {
    223   let Latency = 11;
    224   let ResourceCycles = [3, 2, 2, 1];
    225 }
    226 
    227 // AES Instructions.
    228 def : WriteRes<WriteAESDecEnc, [HWPort5]> {
    229   let Latency = 7;
    230   let ResourceCycles = [1];
    231 }
    232 def : WriteRes<WriteAESDecEncLd, [HWPort5, HWPort23]> {
    233   let Latency = 7;
    234   let ResourceCycles = [1, 1];
    235 }
    236 
    237 def : WriteRes<WriteAESIMC, [HWPort5]> {
    238   let Latency = 14;
    239   let ResourceCycles = [2];
    240 }
    241 def : WriteRes<WriteAESIMCLd, [HWPort5, HWPort23]> {
    242   let Latency = 14;
    243   let ResourceCycles = [2, 1];
    244 }
    245 
    246 def : WriteRes<WriteAESKeyGen, [HWPort0, HWPort5]> {
    247   let Latency = 10;
    248   let ResourceCycles = [2, 8];
    249 }
    250 def : WriteRes<WriteAESKeyGenLd, [HWPort0, HWPort5, HWPort23]> {
    251   let Latency = 10;
    252   let ResourceCycles = [2, 7, 1];
    253 }
    254 
    255 // Carry-less multiplication instructions.
    256 def : WriteRes<WriteCLMul, [HWPort0, HWPort5]> {
    257   let Latency = 7;
    258   let ResourceCycles = [2, 1];
    259 }
    260 def : WriteRes<WriteCLMulLd, [HWPort0, HWPort5, HWPort23]> {
    261   let Latency = 7;
    262   let ResourceCycles = [2, 1, 1];
    263 }
    264 
    265 def : WriteRes<WriteSystem,     [HWPort0156]> { let Latency = 100; }
    266 def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; }
    267 def : WriteRes<WriteFence,  [HWPort23, HWPort4]>;
    268 def : WriteRes<WriteNop, []>;
    269 
    270 //================ Exceptions ================//
    271 
    272 //-- Specific Scheduling Models --//
    273 
    274 // Starting with P0.
    275 def WriteP0 : SchedWriteRes<[HWPort0]>;
    276 
    277 def WriteP0_P1_Lat4 : SchedWriteRes<[HWPort0, HWPort1]> {
    278   let Latency = 4;
    279   let NumMicroOps = 2;
    280   let ResourceCycles = [1, 1];
    281 }
    282 
    283 def WriteP0_P1_Lat4Ld : SchedWriteRes<[HWPort0, HWPort1, HWPort23]> {
    284   let Latency = 8;
    285   let NumMicroOps = 3;
    286   let ResourceCycles = [1, 1, 1];
    287 }
    288 
    289 def WriteP01 : SchedWriteRes<[HWPort01]>;
    290 
    291 def Write2P01 : SchedWriteRes<[HWPort01]> {
    292   let NumMicroOps = 2;
    293 }
    294 def Write3P01 : SchedWriteRes<[HWPort01]> {
    295   let NumMicroOps = 3;
    296 }
    297 
    298 def WriteP015 : SchedWriteRes<[HWPort015]>;
    299 
    300 def WriteP01_P5 : SchedWriteRes<[HWPort01, HWPort5]> {
    301   let NumMicroOps = 2;
    302 }
    303 def WriteP06 : SchedWriteRes<[HWPort06]>;
    304 
    305 def Write2P06 : SchedWriteRes<[HWPort06]> {
    306   let Latency = 1;
    307   let NumMicroOps = 2;
    308   let ResourceCycles = [2];
    309 }
    310 
    311 def Write3P06_Lat2 : SchedWriteRes<[HWPort06]> {
    312   let Latency = 2;
    313   let NumMicroOps = 3;
    314   let ResourceCycles = [3];
    315 }
    316 
    317 def WriteP0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> {
    318   let NumMicroOps = 2;
    319 }
    320 
    321 def Write2P0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> {
    322   let NumMicroOps = 3;
    323   let ResourceCycles = [2, 1];
    324 }
    325 
    326 def Write2P0156_Lat2 : SchedWriteRes<[HWPort0156]> {
    327   let Latency = 2;
    328   let ResourceCycles = [2];
    329 }
    330 def Write2P0156_Lat2Ld : SchedWriteRes<[HWPort0156, HWPort23]> {
    331   let Latency = 6;
    332   let ResourceCycles = [2, 1];
    333 }
    334 
    335 def Write5P0156 : SchedWriteRes<[HWPort0156]> {
    336   let NumMicroOps = 5;
    337   let ResourceCycles = [5];
    338 }
    339 
    340 def WriteP0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> {
    341   let Latency = 1;
    342   let ResourceCycles = [1, 2, 1];
    343 }
    344 
    345 def Write2P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> {
    346   let Latency = 1;
    347   let ResourceCycles = [2, 2, 1];
    348 }
    349 
    350 def Write3P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> {
    351   let Latency = 1;
    352   let ResourceCycles = [3, 2, 1];
    353 }
    354 
    355 // Starting with P1.
    356 def WriteP1 : SchedWriteRes<[HWPort1]>;
    357 
    358 def WriteP1_P23 : SchedWriteRes<[HWPort1, HWPort23]> {
    359   let NumMicroOps = 2;
    360 }
    361 def WriteP1_Lat3 : SchedWriteRes<[HWPort1]> {
    362   let Latency = 3;
    363 }
    364 def WriteP1_Lat3Ld : SchedWriteRes<[HWPort1, HWPort23]> {
    365   let Latency = 7;
    366 }
    367 
    368 def Write2P1 : SchedWriteRes<[HWPort1]> {
    369   let NumMicroOps = 2;
    370   let ResourceCycles = [2];
    371 }
    372 def Write2P1_P23 : SchedWriteRes<[HWPort1, HWPort23]> {
    373   let NumMicroOps = 3;
    374   let ResourceCycles = [2, 1];
    375 }
    376 def WriteP15 : SchedWriteRes<[HWPort15]>;
    377 def WriteP15Ld : SchedWriteRes<[HWPort15, HWPort23]> {
    378   let Latency = 4;
    379 }
    380 
    381 def WriteP1_P5_Lat4 : SchedWriteRes<[HWPort1, HWPort5]> {
    382   let Latency = 4;
    383   let NumMicroOps = 2;
    384   let ResourceCycles = [1, 1];
    385 }
    386 
    387 def WriteP1_P5_Lat4Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
    388   let Latency = 8;
    389   let NumMicroOps = 3;
    390   let ResourceCycles = [1, 1, 1];
    391 }
    392 
    393 def WriteP1_P5_Lat6 : SchedWriteRes<[HWPort1, HWPort5]> {
    394   let Latency = 6;
    395   let NumMicroOps = 2;
    396   let ResourceCycles = [1, 1];
    397 }
    398 
    399 def WriteP1_P5_Lat6Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
    400   let Latency = 10;
    401   let NumMicroOps = 3;
    402   let ResourceCycles = [1, 1, 1];
    403 }
    404 
    405 // Starting with P2.
    406 def Write2P237_P4 : SchedWriteRes<[HWPort237, HWPort4]> {
    407   let Latency = 1;
    408   let ResourceCycles = [2, 1];
    409 }
    410 
    411 // Starting with P5.
    412 def WriteP5 : SchedWriteRes<[HWPort5]>;
    413 def WriteP5Ld : SchedWriteRes<[HWPort5, HWPort23]> {
    414   let Latency = 5;
    415   let NumMicroOps = 2;
    416   let ResourceCycles = [1, 1];
    417 }
    418 
    419 // Notation:
    420 // - r: register.
    421 // - mm: 64 bit mmx register.
    422 // - x = 128 bit xmm register.
    423 // - (x)mm = mmx or xmm register.
    424 // - y = 256 bit ymm register.
    425 // - v = any vector register.
    426 // - m = memory.
    427 
    428 //=== Integer Instructions ===//
    429 //-- Move instructions --//
    430 
    431 // MOV.
    432 // r16,m.
    433 def : InstRW<[WriteALULd], (instregex "MOV16rm")>;
    434 
    435 // MOVSX, MOVZX.
    436 // r,m.
    437 def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
    438 
    439 // CMOVcc.
    440 // r,r.
    441 def : InstRW<[Write2P0156_Lat2],
    442       (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rr")>;
    443 // r,m.
    444 def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd],
    445       (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rm")>;
    446 
    447 // XCHG.
    448 // r,r.
    449 def WriteXCHG : SchedWriteRes<[HWPort0156]> {
    450   let Latency = 2;
    451   let ResourceCycles = [3];
    452 }
    453 
    454 def : InstRW<[WriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>;
    455 
    456 // r,m.
    457 def WriteXCHGrm : SchedWriteRes<[]> {
    458   let Latency = 21;
    459   let NumMicroOps = 8;
    460 }
    461 def : InstRW<[WriteXCHGrm], (instregex "XCHG(8|16|32|64)rm")>;
    462 
    463 // XLAT.
    464 def WriteXLAT : SchedWriteRes<[]> {
    465   let Latency = 7;
    466   let NumMicroOps = 3;
    467 }
    468 def : InstRW<[WriteXLAT], (instregex "XLAT")>;
    469 
    470 // PUSH.
    471 // m.
    472 def : InstRW<[Write2P237_P4], (instregex "PUSH(16|32)rmm")>;
    473 
    474 // PUSHF.
    475 def WritePushF : SchedWriteRes<[HWPort1, HWPort4, HWPort237, HWPort06]> {
    476   let NumMicroOps = 4;
    477 }
    478 def : InstRW<[WritePushF], (instregex "PUSHF(16|32)")>;
    479 
    480 // PUSHA.
    481 def WritePushA : SchedWriteRes<[]> {
    482   let NumMicroOps = 19;
    483 }
    484 def : InstRW<[WritePushA], (instregex "PUSHA(16|32)")>;
    485 
    486 // POP.
    487 // m.
    488 def : InstRW<[Write2P237_P4], (instregex "POP(16|32)rmm")>;
    489 
    490 // POPF.
    491 def WritePopF : SchedWriteRes<[]> {
    492   let NumMicroOps = 9;
    493 }
    494 def : InstRW<[WritePopF], (instregex "POPF(16|32)")>;
    495 
    496 // POPA.
    497 def WritePopA : SchedWriteRes<[]> {
    498   let NumMicroOps = 18;
    499 }
    500 def : InstRW<[WritePopA], (instregex "POPA(16|32)")>;
    501 
    502 // LAHF SAHF.
    503 def : InstRW<[WriteP06], (instregex "(S|L)AHF")>;
    504 
    505 // BSWAP.
    506 // r32.
    507 def WriteBSwap32 : SchedWriteRes<[HWPort15]>;
    508 def : InstRW<[WriteBSwap32], (instregex "BSWAP32r")>;
    509 
    510 // r64.
    511 def WriteBSwap64 : SchedWriteRes<[HWPort06, HWPort15]> {
    512   let NumMicroOps = 2;
    513 }
    514 def : InstRW<[WriteBSwap64], (instregex "BSWAP64r")>;
    515 
    516 // MOVBE.
    517 // r16,m16 / r64,m64.
    518 def : InstRW<[Write2P0156_Lat2Ld], (instregex "MOVBE(16|64)rm")>;
    519 
    520 // r32, m32.
    521 def WriteMoveBE32rm : SchedWriteRes<[HWPort15, HWPort23]> {
    522   let NumMicroOps = 2;
    523 }
    524 def : InstRW<[WriteMoveBE32rm], (instregex "MOVBE32rm")>;
    525 
    526 // m16,r16.
    527 def WriteMoveBE16mr : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
    528   let NumMicroOps = 3;
    529 }
    530 def : InstRW<[WriteMoveBE16mr], (instregex "MOVBE16mr")>;
    531 
    532 // m32,r32.
    533 def WriteMoveBE32mr : SchedWriteRes<[HWPort15, HWPort237, HWPort4]> {
    534   let NumMicroOps = 3;
    535 }
    536 def : InstRW<[WriteMoveBE32mr], (instregex "MOVBE32mr")>;
    537 
    538 // m64,r64.
    539 def WriteMoveBE64mr : SchedWriteRes<[HWPort06, HWPort15, HWPort237, HWPort4]> {
    540   let NumMicroOps = 4;
    541 }
    542 def : InstRW<[WriteMoveBE64mr], (instregex "MOVBE64mr")>;
    543 
    544 //-- Arithmetic instructions --//
    545 
    546 // ADD SUB.
    547 // m,r/i.
    548 def : InstRW<[Write2P0156_2P237_P4],
    549               (instregex "(ADD|SUB)(8|16|32|64)m(r|i)",
    550               "(ADD|SUB)(8|16|32|64)mi8", "(ADD|SUB)64mi32")>;
    551 
    552 // ADC SBB.
    553 // r,r/i.
    554 def : InstRW<[Write2P0156_Lat2], (instregex "(ADC|SBB)(8|16|32|64)r(r|i)",
    555                            "(ADC|SBB)(16|32|64)ri8",
    556                            "(ADC|SBB)64ri32",
    557                            "(ADC|SBB)(8|16|32|64)rr_REV")>;
    558 
    559 // r,m.
    560 def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd], (instregex "(ADC|SBB)(8|16|32|64)rm")>;
    561 
    562 // m,r/i.
    563 def : InstRW<[Write3P0156_2P237_P4],
    564              (instregex "(ADC|SBB)(8|16|32|64)m(r|i)",
    565               "(ADC|SBB)(16|32|64)mi8",
    566               "(ADC|SBB)64mi32")>;
    567 
    568 // INC DEC NOT NEG.
    569 // m.
    570 def : InstRW<[WriteP0156_2P237_P4],
    571              (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m",
    572               "(INC|DEC)64(16|32)m")>;
    573 
    574 // MUL IMUL.
    575 // r16.
    576 def WriteMul16 : SchedWriteRes<[HWPort1, HWPort0156]> {
    577   let Latency = 4;
    578   let NumMicroOps = 4;
    579 }
    580 def : InstRW<[WriteMul16], (instregex "IMUL16r", "MUL16r")>;
    581 
    582 // m16.
    583 def WriteMul16Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> {
    584   let Latency = 8;
    585   let NumMicroOps = 5;
    586 }
    587 def : InstRW<[WriteMul16Ld], (instregex "IMUL16m", "MUL16m")>;
    588 
    589 // r32.
    590 def WriteMul32 : SchedWriteRes<[HWPort1, HWPort0156]> {
    591   let Latency = 4;
    592   let NumMicroOps = 3;
    593 }
    594 def : InstRW<[WriteMul32], (instregex "IMUL32r", "MUL32r")>;
    595 
    596 // m32.
    597 def WriteMul32Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> {
    598   let Latency = 8;
    599   let NumMicroOps = 4;
    600 }
    601 def : InstRW<[WriteMul32Ld], (instregex "IMUL32m", "MUL32m")>;
    602 
    603 // r64.
    604 def WriteMul64 : SchedWriteRes<[HWPort1, HWPort6]> {
    605   let Latency = 3;
    606   let NumMicroOps = 2;
    607 }
    608 def : InstRW<[WriteMul64], (instregex "IMUL64r", "MUL64r")>;
    609 
    610 // m64.
    611 def WriteMul64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> {
    612   let Latency = 7;
    613   let NumMicroOps = 3;
    614 }
    615 def : InstRW<[WriteMul64Ld], (instregex "IMUL64m", "MUL64m")>;
    616 
    617 // r16,r16.
    618 def WriteMul16rri : SchedWriteRes<[HWPort1, HWPort0156]> {
    619   let Latency = 4;
    620   let NumMicroOps = 2;
    621 }
    622 def : InstRW<[WriteMul16rri], (instregex "IMUL16rri", "IMUL16rri8")>;
    623 
    624 // r16,m16.
    625 def WriteMul16rmi : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> {
    626   let Latency = 8;
    627   let NumMicroOps = 3;
    628 }
    629 def : InstRW<[WriteMul16rmi], (instregex "IMUL16rmi", "IMUL16rmi8")>;
    630 
    631 // MULX.
    632 // r32,r32,r32.
    633 def WriteMulX32 : SchedWriteRes<[HWPort1, HWPort056]> {
    634   let Latency = 4;
    635   let NumMicroOps = 3;
    636   let ResourceCycles = [1, 2];
    637 }
    638 def : InstRW<[WriteMulX32], (instregex "MULX32rr")>;
    639 
    640 // r32,r32,m32.
    641 def WriteMulX32Ld : SchedWriteRes<[HWPort1, HWPort056, HWPort23]> {
    642   let Latency = 8;
    643   let NumMicroOps = 4;
    644   let ResourceCycles = [1, 2, 1];
    645 }
    646 def : InstRW<[WriteMulX32Ld], (instregex "MULX32rm")>;
    647 
    648 // r64,r64,r64.
    649 def WriteMulX64 : SchedWriteRes<[HWPort1, HWPort6]> {
    650   let Latency = 4;
    651   let NumMicroOps = 2;
    652 }
    653 def : InstRW<[WriteMulX64], (instregex "MULX64rr")>;
    654 
    655 // r64,r64,m64.
    656 def WriteMulX64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> {
    657   let Latency = 8;
    658   let NumMicroOps = 3;
    659 }
    660 def : InstRW<[WriteMulX64Ld], (instregex "MULX64rm")>;
    661 
    662 // DIV.
    663 // r8.
    664 def WriteDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
    665   let Latency = 22;
    666   let NumMicroOps = 9;
    667 }
    668 def : InstRW<[WriteDiv8], (instregex "DIV8r")>;
    669 
    670 // r16.
    671 def WriteDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
    672   let Latency = 23;
    673   let NumMicroOps = 10;
    674 }
    675 def : InstRW<[WriteDiv16], (instregex "DIV16r")>;
    676 
    677 // r32.
    678 def WriteDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
    679   let Latency = 22;
    680   let NumMicroOps = 10;
    681 }
    682 def : InstRW<[WriteDiv32], (instregex "DIV32r")>;
    683 
    684 // r64.
    685 def WriteDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
    686   let Latency = 32;
    687   let NumMicroOps = 36;
    688 }
    689 def : InstRW<[WriteDiv64], (instregex "DIV64r")>;
    690 
    691 // IDIV.
    692 // r8.
    693 def WriteIDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
    694   let Latency = 23;
    695   let NumMicroOps = 9;
    696 }
    697 def : InstRW<[WriteIDiv8], (instregex "IDIV8r")>;
    698 
    699 // r16.
    700 def WriteIDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
    701   let Latency = 23;
    702   let NumMicroOps = 10;
    703 }
    704 def : InstRW<[WriteIDiv16], (instregex "IDIV16r")>;
    705 
    706 // r32.
    707 def WriteIDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
    708   let Latency = 22;
    709   let NumMicroOps = 9;
    710 }
    711 def : InstRW<[WriteIDiv32], (instregex "IDIV32r")>;
    712 
    713 // r64.
    714 def WriteIDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
    715   let Latency = 39;
    716   let NumMicroOps = 59;
    717 }
    718 def : InstRW<[WriteIDiv64], (instregex "IDIV64r")>;
    719 
    720 //-- Logic instructions --//
    721 
    722 // AND OR XOR.
    723 // m,r/i.
    724 def : InstRW<[Write2P0156_2P237_P4],
    725              (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)",
    726               "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>;
    727 
    728 // SHR SHL SAR.
    729 // m,i.
    730 def WriteShiftRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
    731   let NumMicroOps = 4;
    732   let ResourceCycles = [2, 1, 1];
    733 }
    734 def : InstRW<[WriteShiftRMW], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>;
    735 
    736 // r,cl.
    737 def : InstRW<[Write3P06_Lat2], (instregex "S(A|H)(R|L)(8|16|32|64)rCL")>;
    738 
    739 // m,cl.
    740 def WriteShiftClLdRMW : SchedWriteRes<[HWPort06, HWPort23, HWPort4]> {
    741   let NumMicroOps = 6;
    742   let ResourceCycles = [3, 2, 1];
    743 }
    744 def : InstRW<[WriteShiftClLdRMW], (instregex "S(A|H)(R|L)(8|16|32|64)mCL")>;
    745 
    746 // ROR ROL.
    747 // r,1.
    748 def : InstRW<[Write2P06], (instregex "RO(R|L)(8|16|32|64)r1")>;
    749 
    750 // m,i.
    751 def WriteRotateRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
    752   let NumMicroOps = 5;
    753   let ResourceCycles = [2, 2, 1];
    754 }
    755 def : InstRW<[WriteRotateRMW], (instregex "RO(R|L)(8|16|32|64)mi")>;
    756 
    757 // r,cl.
    758 def : InstRW<[Write3P06_Lat2], (instregex "RO(R|L)(8|16|32|64)rCL")>;
    759 
    760 // m,cl.
    761 def WriteRotateRMWCL : SchedWriteRes<[]> {
    762   let NumMicroOps = 6;
    763 }
    764 def : InstRW<[WriteRotateRMWCL], (instregex "RO(R|L)(8|16|32|64)mCL")>;
    765 
    766 // RCR RCL.
    767 // r,1.
    768 def WriteRCr1 : SchedWriteRes<[HWPort06, HWPort0156]> {
    769   let Latency = 2;
    770   let NumMicroOps = 3;
    771   let ResourceCycles = [2, 1];
    772 }
    773 def : InstRW<[WriteRCr1], (instregex "RC(R|L)(8|16|32|64)r1")>;
    774 
    775 // m,1.
    776 def WriteRCm1 : SchedWriteRes<[]> {
    777   let NumMicroOps = 6;
    778 }
    779 def : InstRW<[WriteRCm1], (instregex "RC(R|L)(8|16|32|64)m1")>;
    780 
    781 // r,i.
    782 def WriteRCri : SchedWriteRes<[HWPort0156]> {
    783   let Latency = 6;
    784   let NumMicroOps = 8;
    785 }
    786 def : InstRW<[WriteRCri], (instregex "RC(R|L)(8|16|32|64)r(i|CL)")>;
    787 
    788 // m,i.
    789 def WriteRCmi : SchedWriteRes<[]> {
    790   let NumMicroOps = 11;
    791 }
    792 def : InstRW<[WriteRCmi], (instregex "RC(R|L)(8|16|32|64)m(i|CL)")>;
    793 
    794 // SHRD SHLD.
    795 // r,r,i.
    796 def WriteShDrr : SchedWriteRes<[HWPort1]> {
    797   let Latency = 3;
    798 }
    799 def : InstRW<[WriteShDrr], (instregex "SH(R|L)D(16|32|64)rri8")>;
    800 
    801 // m,r,i.
    802 def WriteShDmr : SchedWriteRes<[]> {
    803   let NumMicroOps = 5;
    804 }
    805 def : InstRW<[WriteShDmr], (instregex "SH(R|L)D(16|32|64)mri8")>;
    806 
    807 // r,r,cl.
    808 def WriteShlDCL : SchedWriteRes<[HWPort0156]> {
    809   let Latency = 3;
    810   let NumMicroOps = 4;
    811 }
    812 def : InstRW<[WriteShlDCL], (instregex "SHLD(16|32|64)rrCL")>;
    813 
    814 // r,r,cl.
    815 def WriteShrDCL : SchedWriteRes<[HWPort0156]> {
    816   let Latency = 4;
    817   let NumMicroOps = 4;
    818 }
    819 def : InstRW<[WriteShrDCL], (instregex "SHRD(16|32|64)rrCL")>;
    820 
    821 // m,r,cl.
    822 def WriteShDmrCL : SchedWriteRes<[]> {
    823   let NumMicroOps = 7;
    824 }
    825 def : InstRW<[WriteShDmrCL], (instregex "SH(R|L)D(16|32|64)mrCL")>;
    826 
    827 // BT.
    828 // r,r/i.
    829 def : InstRW<[WriteShift], (instregex "BT(16|32|64)r(r|i8)")>;
    830 
    831 // m,r.
    832 def WriteBTmr : SchedWriteRes<[]> {
    833   let NumMicroOps = 10;
    834 }
    835 def : InstRW<[WriteBTmr], (instregex "BT(16|32|64)mr")>;
    836 
    837 // m,i.
    838 def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>;
    839 
    840 // BTR BTS BTC.
    841 // r,r,i.
    842 def : InstRW<[WriteShift], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>;
    843 
    844 // m,r.
    845 def WriteBTRSCmr : SchedWriteRes<[]> {
    846   let NumMicroOps = 11;
    847 }
    848 def : InstRW<[WriteBTRSCmr], (instregex "BT(R|S|C)(16|32|64)mr")>;
    849 
    850 // m,i.
    851 def : InstRW<[WriteShiftLd], (instregex "BT(R|S|C)(16|32|64)mi8")>;
    852 
    853 // BSF BSR.
    854 // r,r.
    855 def : InstRW<[WriteP1_Lat3], (instregex "BS(R|F)(16|32|64)rr")>;
    856 // r,m.
    857 def : InstRW<[WriteP1_Lat3Ld], (instregex "BS(R|F)(16|32|64)rm")>;
    858 
    859 // SETcc.
    860 // r.
    861 def : InstRW<[WriteShift],
    862              (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)r")>;
    863 // m.
    864 def WriteSetCCm : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
    865   let NumMicroOps = 3;
    866 }
    867 def : InstRW<[WriteSetCCm],
    868              (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)m")>;
    869 
    870 // CLD STD.
    871 def WriteCldStd : SchedWriteRes<[HWPort15, HWPort6]> {
    872   let NumMicroOps = 3;
    873 }
    874 def : InstRW<[WriteCldStd], (instregex "STD", "CLD")>;
    875 
    876 // LZCNT TZCNT.
    877 // r,r.
    878 def : InstRW<[WriteP1_Lat3], (instregex "(L|TZCNT)(16|32|64)rr")>;
    879 // r,m.
    880 def : InstRW<[WriteP1_Lat3Ld], (instregex "(L|TZCNT)(16|32|64)rm")>;
    881 
    882 // ANDN.
    883 // r,r.
    884 def : InstRW<[WriteP15], (instregex "ANDN(32|64)rr")>;
    885 // r,m.
    886 def : InstRW<[WriteP15Ld], (instregex "ANDN(32|64)rm")>;
    887 
    888 // BLSI BLSMSK BLSR.
    889 // r,r.
    890 def : InstRW<[WriteP15], (instregex "BLS(I|MSK|R)(32|64)rr")>;
    891 // r,m.
    892 def : InstRW<[WriteP15Ld], (instregex "BLS(I|MSK|R)(32|64)rm")>;
    893 
    894 // BEXTR.
    895 // r,r,r.
    896 def : InstRW<[Write2P0156_Lat2], (instregex "BEXTR(32|64)rr")>;
    897 // r,m,r.
    898 def : InstRW<[Write2P0156_Lat2Ld], (instregex "BEXTR(32|64)rm")>;
    899 
    900 // BZHI.
    901 // r,r,r.
    902 def : InstRW<[WriteP15], (instregex "BZHI(32|64)rr")>;
    903 // r,m,r.
    904 def : InstRW<[WriteP15Ld], (instregex "BZHI(32|64)rm")>;
    905 
    906 // PDEP PEXT.
    907 // r,r,r.
    908 def : InstRW<[WriteP1_Lat3], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>;
    909 // r,m,r.
    910 def : InstRW<[WriteP1_Lat3Ld], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>;
    911 
    912 //-- Control transfer instructions --//
    913 
    914 // J(E|R)CXZ.
    915 def WriteJCXZ : SchedWriteRes<[HWPort0156, HWPort6]> {
    916   let NumMicroOps = 2;
    917 }
    918 def : InstRW<[WriteJCXZ], (instregex "JCXZ", "JECXZ_(32|64)", "JRCXZ")>;
    919 
    920 // LOOP.
    921 def WriteLOOP : SchedWriteRes<[]> {
    922   let NumMicroOps = 7;
    923 }
    924 def : InstRW<[WriteLOOP], (instregex "LOOP")>;
    925 
    926 // LOOP(N)E
    927 def WriteLOOPE : SchedWriteRes<[]> {
    928   let NumMicroOps = 11;
    929 }
    930 def : InstRW<[WriteLOOPE], (instregex "LOOPE", "LOOPNE")>;
    931 
    932 // CALL.
    933 // r.
    934 def WriteCALLr : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> {
    935   let NumMicroOps = 3;
    936 }
    937 def : InstRW<[WriteCALLr], (instregex "CALL(16|32)r")>;
    938 
    939 // m.
    940 def WriteCALLm : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> {
    941   let NumMicroOps = 4;
    942   let ResourceCycles = [2, 1, 1];
    943 }
    944 def : InstRW<[WriteCALLm], (instregex "CALL(16|32)m")>;
    945 
    946 // RET.
    947 def WriteRET : SchedWriteRes<[HWPort237, HWPort6]> {
    948   let NumMicroOps = 2;
    949 }
    950 def : InstRW<[WriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)")>;
    951 
    952 // i.
    953 def WriteRETI : SchedWriteRes<[HWPort23, HWPort6, HWPort015]> {
    954   let NumMicroOps = 4;
    955   let ResourceCycles = [1, 2, 1];
    956 }
    957 def : InstRW<[WriteRETI], (instregex "RETI(L|Q|W)", "LRETI(L|Q|W)")>;
    958 
    959 // BOUND.
    960 // r,m.
    961 def WriteBOUND : SchedWriteRes<[]> {
    962   let NumMicroOps = 15;
    963 }
    964 def : InstRW<[WriteBOUND], (instregex "BOUNDS(16|32)rm")>;
    965 
    966 // INTO.
    967 def WriteINTO : SchedWriteRes<[]> {
    968   let NumMicroOps = 4;
    969 }
    970 def : InstRW<[WriteINTO], (instregex "INTO")>;
    971 
    972 //-- String instructions --//
    973 
    974 // LODSB/W.
    975 def : InstRW<[Write2P0156_P23], (instregex "LODS(B|W)")>;
    976 
    977 // LODSD/Q.
    978 def : InstRW<[WriteP0156_P23], (instregex "LODS(L|Q)")>;
    979 
    980 // STOS.
    981 def WriteSTOS : SchedWriteRes<[HWPort23, HWPort0156, HWPort4]> {
    982   let NumMicroOps = 3;
    983 }
    984 def : InstRW<[WriteSTOS], (instregex "STOS(B|L|Q|W)")>;
    985 
    986 // MOVS.
    987 def WriteMOVS : SchedWriteRes<[HWPort23, HWPort4, HWPort0156]> {
    988   let Latency = 4;
    989   let NumMicroOps = 5;
    990   let ResourceCycles = [2, 1, 2];
    991 }
    992 def : InstRW<[WriteMOVS], (instregex "MOVS(B|L|Q|W)")>;
    993 
    994 // SCAS.
    995 def : InstRW<[Write2P0156_P23], (instregex "SCAS(B|W|L|Q)")>;
    996 
    997 // CMPS.
    998 def WriteCMPS : SchedWriteRes<[HWPort23, HWPort0156]> {
    999   let Latency = 4;
   1000   let NumMicroOps = 5;
   1001   let ResourceCycles = [2, 3];
   1002 }
   1003 def : InstRW<[WriteCMPS], (instregex "CMPS(B|L|Q|W)")>;
   1004 
   1005 //-- Synchronization instructions --//
   1006 
   1007 // XADD.
   1008 def WriteXADD : SchedWriteRes<[]> {
   1009   let NumMicroOps = 5;
   1010 }
   1011 def : InstRW<[WriteXADD], (instregex "XADD(8|16|32|64)rm")>;
   1012 
   1013 // CMPXCHG.
   1014 def WriteCMPXCHG : SchedWriteRes<[]> {
   1015   let NumMicroOps = 6;
   1016 }
   1017 def : InstRW<[WriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>;
   1018 
   1019 // CMPXCHG8B.
   1020 def WriteCMPXCHG8B : SchedWriteRes<[]> {
   1021   let NumMicroOps = 15;
   1022 }
   1023 def : InstRW<[WriteCMPXCHG8B], (instregex "CMPXCHG8B")>;
   1024 
   1025 // CMPXCHG16B.
   1026 def WriteCMPXCHG16B : SchedWriteRes<[]> {
   1027   let NumMicroOps = 22;
   1028 }
   1029 def : InstRW<[WriteCMPXCHG16B], (instregex "CMPXCHG16B")>;
   1030 
   1031 //-- Other --//
   1032 
   1033 // PAUSE.
   1034 def WritePAUSE : SchedWriteRes<[HWPort05, HWPort6]> {
   1035   let NumMicroOps = 5;
   1036   let ResourceCycles = [1, 3];
   1037 }
   1038 def : InstRW<[WritePAUSE], (instregex "PAUSE")>;
   1039 
   1040 // LEAVE.
   1041 def : InstRW<[Write2P0156_P23], (instregex "LEAVE")>;
   1042 
   1043 // XGETBV.
   1044 def WriteXGETBV : SchedWriteRes<[]> {
   1045   let NumMicroOps = 8;
   1046 }
   1047 def : InstRW<[WriteXGETBV], (instregex "XGETBV")>;
   1048 
   1049 // RDTSC.
   1050 def WriteRDTSC : SchedWriteRes<[]> {
   1051   let NumMicroOps = 15;
   1052 }
   1053 def : InstRW<[WriteRDTSC], (instregex "RDTSC")>;
   1054 
   1055 // RDPMC.
   1056 def WriteRDPMC : SchedWriteRes<[]> {
   1057   let NumMicroOps = 34;
   1058 }
   1059 def : InstRW<[WriteRDPMC], (instregex "RDPMC")>;
   1060 
   1061 // RDRAND.
   1062 def WriteRDRAND : SchedWriteRes<[HWPort23, HWPort015]> {
   1063   let NumMicroOps = 17;
   1064   let ResourceCycles = [1, 16];
   1065 }
   1066 def : InstRW<[WriteRDRAND], (instregex "RDRAND(16|32|64)r")>;
   1067 
   1068 //=== Floating Point x87 Instructions ===//
   1069 //-- Move instructions --//
   1070 
   1071 // FLD.
   1072 // m80.
   1073 def : InstRW<[WriteP01], (instregex "LD_Frr")>;
   1074 
   1075 def WriteLD_F80m : SchedWriteRes<[HWPort01, HWPort23]> {
   1076   let Latency = 4;
   1077   let NumMicroOps = 4;
   1078   let ResourceCycles = [2, 2];
   1079 }
   1080 def : InstRW<[WriteLD_F80m], (instregex "LD_F80m")>;
   1081 
   1082 // FBLD.
   1083 // m80.
   1084 def WriteFBLD : SchedWriteRes<[]> {
   1085   let Latency = 47;
   1086   let NumMicroOps = 43;
   1087 }
   1088 def : InstRW<[WriteFBLD], (instregex "FBLDm")>;
   1089 
   1090 // FST(P).
   1091 // r.
   1092 def : InstRW<[WriteP01], (instregex "ST_(F|FP)rr")>;
   1093 
   1094 // m80.
   1095 def WriteST_FP80m : SchedWriteRes<[HWPort0156, HWPort23, HWPort4]> {
   1096   let NumMicroOps = 7;
   1097   let ResourceCycles = [3, 2, 2];
   1098 }
   1099 def : InstRW<[WriteST_FP80m], (instregex "ST_FP80m")>;
   1100 
   1101 // FBSTP.
   1102 // m80.
   1103 def WriteFBSTP : SchedWriteRes<[]> {
   1104   let NumMicroOps = 226;
   1105 }
   1106 def : InstRW<[WriteFBSTP], (instregex "FBSTPm")>;
   1107 
   1108 // FXCHG.
   1109 def : InstRW<[WriteNop], (instregex "XCH_F")>;
   1110 
   1111 // FILD.
   1112 def WriteFILD : SchedWriteRes<[HWPort01, HWPort23]> {
   1113   let Latency = 6;
   1114   let NumMicroOps = 2;
   1115 }
   1116 def : InstRW<[WriteFILD], (instregex "ILD_F(16|32|64)m")>;
   1117 
   1118 // FIST(P) FISTTP.
   1119 def WriteFIST : SchedWriteRes<[HWPort1, HWPort23, HWPort4]> {
   1120   let Latency = 7;
   1121   let NumMicroOps = 3;
   1122 }
   1123 def : InstRW<[WriteFIST], (instregex "IST_(F|FP)(16|32)m")>;
   1124 
   1125 // FLDZ.
   1126 def : InstRW<[WriteP01], (instregex "LD_F0")>;
   1127 
   1128 // FLD1.
   1129 def : InstRW<[Write2P01], (instregex "LD_F1")>;
   1130 
   1131 // FLDPI FLDL2E etc.
   1132 def : InstRW<[Write2P01], (instregex "FLDPI", "FLDL2(T|E)" "FLDL(G|N)2")>;
   1133 
   1134 // FCMOVcc.
   1135 def WriteFCMOVcc : SchedWriteRes<[HWPort0, HWPort5]> {
   1136   let Latency = 2;
   1137   let NumMicroOps = 3;
   1138   let ResourceCycles = [2, 1];
   1139 }
   1140 def : InstRW<[WriteFCMOVcc], (instregex "CMOV(B|BE|P|NB|NBE|NE|NP)_F")>;
   1141 
   1142 // FNSTSW.
   1143 // AX.
   1144 def WriteFNSTSW : SchedWriteRes<[HWPort0, HWPort0156]> {
   1145   let NumMicroOps = 2;
   1146 }
   1147 def : InstRW<[WriteFNSTSW], (instregex "FNSTSW16r")>;
   1148 
   1149 // m16.
   1150 def WriteFNSTSWm : SchedWriteRes<[HWPort0, HWPort4, HWPort237]> {
   1151   let Latency = 6;
   1152   let NumMicroOps = 3;
   1153 }
   1154 def : InstRW<[WriteFNSTSWm], (instregex "FNSTSWm")>;
   1155 
   1156 // FLDCW.
   1157 def WriteFLDCW : SchedWriteRes<[HWPort01, HWPort23, HWPort6]> {
   1158   let Latency = 7;
   1159   let NumMicroOps = 3;
   1160 }
   1161 def : InstRW<[WriteFLDCW], (instregex "FLDCW16m")>;
   1162 
   1163 // FNSTCW.
   1164 def WriteFNSTCW : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> {
   1165   let NumMicroOps = 3;
   1166 }
   1167 def : InstRW<[WriteFNSTCW], (instregex "FNSTCW16m")>;
   1168 
   1169 // FINCSTP FDECSTP.
   1170 def : InstRW<[WriteP01], (instregex "FINCSTP", "FDECSTP")>;
   1171 
   1172 // FFREE.
   1173 def : InstRW<[WriteP01], (instregex "FFREE")>;
   1174 
   1175 // FNSAVE.
   1176 def WriteFNSAVE : SchedWriteRes<[]> {
   1177   let NumMicroOps = 147;
   1178 }
   1179 def : InstRW<[WriteFNSAVE], (instregex "FSAVEm")>;
   1180 
   1181 // FRSTOR.
   1182 def WriteFRSTOR : SchedWriteRes<[]> {
   1183   let NumMicroOps = 90;
   1184 }
   1185 def : InstRW<[WriteFRSTOR], (instregex "FRSTORm")>;
   1186 
   1187 //-- Arithmetic instructions --//
   1188 
   1189 // FABS.
   1190 def : InstRW<[WriteP0], (instregex "ABS_F")>;
   1191 
   1192 // FCHS.
   1193 def : InstRW<[WriteP0], (instregex "CHS_F")>;
   1194 
   1195 // FCOM(P) FUCOM(P).
   1196 // r.
   1197 def : InstRW<[WriteP1], (instregex "COM_FST0r", "COMP_FST0r", "UCOM_Fr",
   1198                          "UCOM_FPr")>;
   1199 // m.
   1200 def : InstRW<[WriteP1_P23], (instregex "FCOM(32|64)m", "FCOMP(32|64)m")>;
   1201 
   1202 // FCOMPP FUCOMPP.
   1203 // r.
   1204 def : InstRW<[Write2P01], (instregex "FCOMPP", "UCOM_FPPr")>;
   1205 
   1206 // FCOMI(P) FUCOMI(P).
   1207 // m.
   1208 def : InstRW<[Write3P01], (instregex "COM_FIr", "COM_FIPr", "UCOM_FIr",
   1209                            "UCOM_FIPr")>;
   1210 
   1211 // FICOM(P).
   1212 def : InstRW<[Write2P1_P23], (instregex "FICOM(16|32)m", "FICOMP(16|32)m")>;
   1213 
   1214 // FTST.
   1215 def : InstRW<[WriteP1], (instregex "TST_F")>;
   1216 
   1217 // FXAM.
   1218 def : InstRW<[Write2P1], (instregex "FXAM")>;
   1219 
   1220 // FPREM.
   1221 def WriteFPREM : SchedWriteRes<[]> {
   1222   let Latency = 19;
   1223   let NumMicroOps = 28;
   1224 }
   1225 def : InstRW<[WriteFPREM], (instregex "FPREM")>;
   1226 
   1227 // FPREM1.
   1228 def WriteFPREM1 : SchedWriteRes<[]> {
   1229   let Latency = 27;
   1230   let NumMicroOps = 41;
   1231 }
   1232 def : InstRW<[WriteFPREM1], (instregex "FPREM1")>;
   1233 
   1234 // FRNDINT.
   1235 def WriteFRNDINT : SchedWriteRes<[]> {
   1236   let Latency = 11;
   1237   let NumMicroOps = 17;
   1238 }
   1239 def : InstRW<[WriteFRNDINT], (instregex "FRNDINT")>;
   1240 
   1241 //-- Math instructions --//
   1242 
   1243 // FSCALE.
   1244 def WriteFSCALE : SchedWriteRes<[]> {
   1245   let Latency = 75; // 49-125
   1246   let NumMicroOps = 50; // 25-75
   1247 }
   1248 def : InstRW<[WriteFSCALE], (instregex "FSCALE")>;
   1249 
   1250 // FXTRACT.
   1251 def WriteFXTRACT : SchedWriteRes<[]> {
   1252   let Latency = 15;
   1253   let NumMicroOps = 17;
   1254 }
   1255 def : InstRW<[WriteFXTRACT], (instregex "FXTRACT")>;
   1256 
   1257 //-- Other instructions --//
   1258 
   1259 // FNOP.
   1260 def : InstRW<[WriteP01], (instregex "FNOP")>;
   1261 
   1262 // WAIT.
   1263 def : InstRW<[Write2P01], (instregex "WAIT")>;
   1264 
   1265 // FNCLEX.
   1266 def : InstRW<[Write5P0156], (instregex "FNCLEX")>;
   1267 
   1268 // FNINIT.
   1269 def WriteFNINIT : SchedWriteRes<[]> {
   1270   let NumMicroOps = 26;
   1271 }
   1272 def : InstRW<[WriteFNINIT], (instregex "FNINIT")>;
   1273 
   1274 //=== Integer MMX and XMM Instructions ===//
   1275 //-- Move instructions --//
   1276 
   1277 // MOVD.
   1278 // r32/64 <- (x)mm.
   1279 def : InstRW<[WriteP0], (instregex "MMX_MOVD64grr", "MMX_MOVD64from64rr",
   1280                          "VMOVPDI2DIrr", "MOVPDI2DIrr")>;
   1281 
   1282 // (x)mm <- r32/64.
   1283 def : InstRW<[WriteP5], (instregex "MMX_MOVD64rr", "MMX_MOVD64to64rr",
   1284                          "VMOVDI2PDIrr", "MOVDI2PDIrr")>;
   1285 
   1286 // MOVQ.
   1287 // r64 <- (x)mm.
   1288 def : InstRW<[WriteP0], (instregex "VMOVPQIto64rr")>;
   1289 
   1290 // (x)mm <- r64.
   1291 def : InstRW<[WriteP5], (instregex "VMOV64toPQIrr", "VMOVZQI2PQIrr")>;
   1292 
   1293 // (x)mm <- (x)mm.
   1294 def : InstRW<[WriteP015], (instregex "MMX_MOVQ64rr")>;
   1295 
   1296 // (V)MOVDQA/U.
   1297 // x <- x.
   1298 def : InstRW<[WriteP015], (instregex "MOVDQ(A|U)rr", "VMOVDQ(A|U)rr",
   1299                            "MOVDQ(A|U)rr_REV", "VMOVDQ(A|U)rr_REV",
   1300                            "VMOVDQ(A|U)Yrr", "VMOVDQ(A|U)Yrr_REV")>;
   1301 
   1302 // MOVDQ2Q.
   1303 def : InstRW<[WriteP01_P5], (instregex "MMX_MOVDQ2Qrr")>;
   1304 
   1305 // MOVQ2DQ.
   1306 def : InstRW<[WriteP015], (instregex "MMX_MOVQ2DQrr")>;
   1307 
   1308 
   1309 // PACKSSWB/DW.
   1310 // mm <- mm.
   1311 def WriteMMXPACKSSrr : SchedWriteRes<[HWPort5]> {
   1312   let Latency = 2;
   1313   let NumMicroOps = 3;
   1314   let ResourceCycles = [3];
   1315 }
   1316 def : InstRW<[WriteMMXPACKSSrr], (instregex "MMX_PACKSSDWirr",
   1317                                   "MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>;
   1318 
   1319 // mm <- m64.
   1320 def WriteMMXPACKSSrm : SchedWriteRes<[HWPort23, HWPort5]> {
   1321   let Latency = 4;
   1322   let NumMicroOps = 3;
   1323   let ResourceCycles = [1, 3];
   1324 }
   1325 def : InstRW<[WriteMMXPACKSSrm], (instregex "MMX_PACKSSDWirm",
   1326                                   "MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>;
   1327 
   1328 // VPMOVSX/ZX BW BD BQ DW DQ.
   1329 // y <- x.
   1330 def WriteVPMOVSX : SchedWriteRes<[HWPort5]> {
   1331   let Latency = 3;
   1332   let NumMicroOps = 1;
   1333 }
   1334 def : InstRW<[WriteVPMOVSX], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>;
   1335 
   1336 // PBLENDW.
   1337 // x,x,i / v,v,v,i
   1338 def WritePBLENDWr : SchedWriteRes<[HWPort5]>;
   1339 def : InstRW<[WritePBLENDWr], (instregex "(V?)PBLENDW(Y?)rri")>;
   1340 
   1341 // x,m,i / v,v,m,i
   1342 def WritePBLENDWm : SchedWriteRes<[HWPort5, HWPort23]> {
   1343   let NumMicroOps = 2;
   1344   let Latency = 4;
   1345   let ResourceCycles = [1, 1];
   1346 }
   1347 def : InstRW<[WritePBLENDWm, ReadAfterLd], (instregex "(V?)PBLENDW(Y?)rmi")>;
   1348 
   1349 // VPBLENDD.
   1350 // v,v,v,i.
   1351 def WriteVPBLENDDr : SchedWriteRes<[HWPort015]>;
   1352 def : InstRW<[WriteVPBLENDDr], (instregex "VPBLENDD(Y?)rri")>;
   1353 
   1354 // v,v,m,i
   1355 def WriteVPBLENDDm : SchedWriteRes<[HWPort015, HWPort23]> {
   1356   let NumMicroOps = 2;
   1357   let Latency = 4;
   1358   let ResourceCycles = [1, 1];
   1359 }
   1360 def : InstRW<[WriteVPBLENDDm, ReadAfterLd], (instregex "VPBLENDD(Y?)rmi")>;
   1361 
   1362 // MASKMOVQ.
   1363 def WriteMASKMOVQ : SchedWriteRes<[HWPort0, HWPort4, HWPort23]> {
   1364   let Latency = 13;
   1365   let NumMicroOps = 4;
   1366   let ResourceCycles = [1, 1, 2];
   1367 }
   1368 def : InstRW<[WriteMASKMOVQ], (instregex "MMX_MASKMOVQ(64)?")>;
   1369 
   1370 // MASKMOVDQU.
   1371 def WriteMASKMOVDQU : SchedWriteRes<[HWPort04, HWPort56, HWPort23]> {
   1372   let Latency = 14;
   1373   let NumMicroOps = 10;
   1374   let ResourceCycles = [4, 2, 4];
   1375 }
   1376 def : InstRW<[WriteMASKMOVDQU], (instregex "(V?)MASKMOVDQU(64)?")>;
   1377 
   1378 // VPMASKMOV D/Q.
   1379 // v,v,m.
   1380 def WriteVPMASKMOVr : SchedWriteRes<[HWPort5, HWPort23]> {
   1381   let Latency = 4;
   1382   let NumMicroOps = 3;
   1383   let ResourceCycles = [2, 1];
   1384 }
   1385 def : InstRW<[WriteVPMASKMOVr, ReadAfterLd],
   1386                                (instregex "VPMASKMOV(D|Q)(Y?)rm")>;
   1387 
   1388 // m, v,v.
   1389 def WriteVPMASKMOVm : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> {
   1390   let Latency = 13;
   1391   let NumMicroOps = 4;
   1392   let ResourceCycles = [1, 1, 1, 1];
   1393 }
   1394 def : InstRW<[WriteVPMASKMOVm], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
   1395 
   1396 // PMOVMSKB.
   1397 def WritePMOVMSKB : SchedWriteRes<[HWPort0]> {
   1398   let Latency = 3;
   1399 }
   1400 def : InstRW<[WritePMOVMSKB], (instregex "(V|MMX_)?PMOVMSKB(Y?)rr")>;
   1401 
   1402 // PEXTR B/W/D/Q.
   1403 // r32,x,i.
   1404 def WritePEXTRr : SchedWriteRes<[HWPort0, HWPort5]> {
   1405   let Latency = 2;
   1406   let NumMicroOps = 2;
   1407   let ResourceCycles = [1, 1];
   1408 }
   1409 def : InstRW<[WritePEXTRr], (instregex "PEXTR(B|W|D|Q)rr", "MMX_PEXTRWirri")>;
   1410 
   1411 // m8,x,i.
   1412 def WritePEXTRm : SchedWriteRes<[HWPort23, HWPort4, HWPort5]> {
   1413   let NumMicroOps = 3;
   1414   let ResourceCycles = [1, 1, 1];
   1415 }
   1416 def : InstRW<[WritePEXTRm], (instregex "PEXTR(B|W|D|Q)mr")>;
   1417 
   1418 // VPBROADCAST B/W.
   1419 // x, m8/16.
   1420 def WriteVPBROADCAST128Ld : SchedWriteRes<[HWPort01, HWPort23, HWPort5]> {
   1421   let Latency = 5;
   1422   let NumMicroOps = 3;
   1423   let ResourceCycles = [1, 1, 1];
   1424 }
   1425 def : InstRW<[WriteVPBROADCAST128Ld, ReadAfterLd],
   1426                                      (instregex "VPBROADCAST(B|W)rm")>;
   1427 
   1428 // y, m8/16
   1429 def WriteVPBROADCAST256Ld : SchedWriteRes<[HWPort01, HWPort23, HWPort5]> {
   1430   let Latency = 7;
   1431   let NumMicroOps = 3;
   1432   let ResourceCycles = [1, 1, 1];
   1433 }
   1434 def : InstRW<[WriteVPBROADCAST256Ld, ReadAfterLd],
   1435                                      (instregex "VPBROADCAST(B|W)Yrm")>;
   1436 
   1437 // VPGATHERDD.
   1438 // x.
   1439 def WriteVPGATHERDD128 : SchedWriteRes<[]> {
   1440   let NumMicroOps = 20;
   1441 }
   1442 def : InstRW<[WriteVPGATHERDD128, ReadAfterLd], (instregex "VPGATHERDDrm")>;
   1443 
   1444 // y.
   1445 def WriteVPGATHERDD256 : SchedWriteRes<[]> {
   1446   let NumMicroOps = 34;
   1447 }
   1448 def : InstRW<[WriteVPGATHERDD256, ReadAfterLd], (instregex "VPGATHERDDYrm")>;
   1449 
   1450 // VPGATHERQD.
   1451 // x.
   1452 def WriteVPGATHERQD128 : SchedWriteRes<[]> {
   1453   let NumMicroOps = 15;
   1454 }
   1455 def : InstRW<[WriteVPGATHERQD128, ReadAfterLd], (instregex "VPGATHERQDrm")>;
   1456 
   1457 // y.
   1458 def WriteVPGATHERQD256 : SchedWriteRes<[]> {
   1459   let NumMicroOps = 22;
   1460 }
   1461 def : InstRW<[WriteVPGATHERQD256, ReadAfterLd], (instregex "VPGATHERQDYrm")>;
   1462 
   1463 // VPGATHERDQ.
   1464 // x.
   1465 def WriteVPGATHERDQ128 : SchedWriteRes<[]> {
   1466   let NumMicroOps = 12;
   1467 }
   1468 def : InstRW<[WriteVPGATHERDQ128, ReadAfterLd], (instregex "VPGATHERDQrm")>;
   1469 
   1470 // y.
   1471 def WriteVPGATHERDQ256 : SchedWriteRes<[]> {
   1472   let NumMicroOps = 20;
   1473 }
   1474 def : InstRW<[WriteVPGATHERDQ256, ReadAfterLd], (instregex "VPGATHERDQYrm")>;
   1475 
   1476 // VPGATHERQQ.
   1477 // x.
   1478 def WriteVPGATHERQQ128 : SchedWriteRes<[]> {
   1479   let NumMicroOps = 14;
   1480 }
   1481 def : InstRW<[WriteVPGATHERQQ128, ReadAfterLd], (instregex "VPGATHERQQrm")>;
   1482 
   1483 // y.
   1484 def WriteVPGATHERQQ256 : SchedWriteRes<[]> {
   1485   let NumMicroOps = 22;
   1486 }
   1487 def : InstRW<[WriteVPGATHERQQ256, ReadAfterLd], (instregex "VPGATHERQQYrm")>;
   1488 
   1489 //-- Arithmetic instructions --//
   1490 
   1491 // PHADD|PHSUB (S) W/D.
   1492 // v <- v,v.
   1493 def WritePHADDSUBr : SchedWriteRes<[HWPort1, HWPort5]> {
   1494   let Latency = 3;
   1495   let NumMicroOps = 3;
   1496   let ResourceCycles = [1, 2];
   1497 }
   1498 def : InstRW<[WritePHADDSUBr], (instregex "MMX_PHADD(W?)rr64",
   1499                                "MMX_PHADDSWrr64",
   1500                                "MMX_PHSUB(W|D)rr64",
   1501                                "MMX_PHSUBSWrr64",
   1502                                "(V?)PH(ADD|SUB)(W|D)(Y?)rr",
   1503                                "(V?)PH(ADD|SUB)SWrr(256)?")>;
   1504 
   1505 // v <- v,m.
   1506 def WritePHADDSUBm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
   1507   let Latency = 6;
   1508   let NumMicroOps = 3;
   1509   let ResourceCycles = [1, 2, 1];
   1510 }
   1511 def : InstRW<[WritePHADDSUBm, ReadAfterLd],
   1512                               (instregex "MMX_PHADD(W?)rm64",
   1513                                "MMX_PHADDSWrm64",
   1514                                "MMX_PHSUB(W|D)rm64",
   1515                                "MMX_PHSUBSWrm64",
   1516                                "(V?)PH(ADD|SUB)(W|D)(Y?)rm",
   1517                                "(V?)PH(ADD|SUB)SWrm(128|256)?")>;
   1518 
   1519 // PCMPGTQ.
   1520 // v <- v,v.
   1521 def WritePCMPGTQr : SchedWriteRes<[HWPort0]> {
   1522   let Latency = 5;
   1523   let NumMicroOps = 1;
   1524 }
   1525 def : InstRW<[WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
   1526 
   1527 // v <- v,m.
   1528 def WritePCMPGTQm : SchedWriteRes<[HWPort0, HWPort23]> {
   1529   let Latency = 5;
   1530   let NumMicroOps = 2;
   1531   let ResourceCycles = [1, 1];
   1532 }
   1533 def : InstRW<[WritePCMPGTQm, ReadAfterLd], (instregex "(V?)PCMPGTQ(Y?)rm")>;
   1534 
   1535 // PMULLD.
   1536 // x,x / y,y,y.
   1537 def WritePMULLDr : SchedWriteRes<[HWPort0]> {
   1538   let Latency = 10;
   1539   let NumMicroOps = 2;
   1540   let ResourceCycles = [2];
   1541 }
   1542 def : InstRW<[WritePMULLDr], (instregex "(V?)PMULLD(Y?)rr")>;
   1543 
   1544 // x,m / y,y,m.
   1545 def WritePMULLDm : SchedWriteRes<[HWPort0, HWPort23]> {
   1546   let Latency = 10;
   1547   let NumMicroOps = 3;
   1548   let ResourceCycles = [2, 1];
   1549 }
   1550 def : InstRW<[WritePMULLDm, ReadAfterLd], (instregex "(V?)PMULLD(Y?)rm")>;
   1551 
   1552 //-- Logic instructions --//
   1553 
   1554 // PTEST.
   1555 // v,v.
   1556 def WritePTESTr : SchedWriteRes<[HWPort0, HWPort5]> {
   1557   let Latency = 2;
   1558   let NumMicroOps = 2;
   1559   let ResourceCycles = [1, 1];
   1560 }
   1561 def : InstRW<[WritePTESTr], (instregex "(V?)PTEST(Y?)rr")>;
   1562 
   1563 // v,m.
   1564 def WritePTESTm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> {
   1565   let Latency = 6;
   1566   let NumMicroOps = 3;
   1567   let ResourceCycles = [1, 1, 1];
   1568 }
   1569 def : InstRW<[WritePTESTr], (instregex "(V?)PTEST(Y?)rm")>;
   1570 
   1571 // PSLL,PSRL,PSRA W/D/Q.
   1572 // x,x / v,v,x.
   1573 def WritePShift : SchedWriteRes<[HWPort0, HWPort5]> {
   1574   let Latency = 2;
   1575   let NumMicroOps = 2;
   1576   let ResourceCycles = [1, 1];
   1577 }
   1578 def : InstRW<[WritePShift], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)(Y?)rr")>;
   1579 
   1580 // PSLL,PSRL DQ.
   1581 def : InstRW<[WriteP5], (instregex "(V?)PS(R|L)LDQ(Y?)ri")>;
   1582 
   1583 //-- Other --//
   1584 
   1585 // EMMS.
   1586 def WriteEMMS : SchedWriteRes<[]> {
   1587   let Latency = 13;
   1588   let NumMicroOps = 31;
   1589 }
   1590 def : InstRW<[WriteEMMS], (instregex "MMX_EMMS")>;
   1591 
   1592 //=== Floating Point XMM and YMM Instructions ===//
   1593 //-- Move instructions --//
   1594 
   1595 // MOVMSKP S/D.
   1596 // r32 <- x.
   1597 def WriteMOVMSKPr : SchedWriteRes<[HWPort0]> {
   1598   let Latency = 3;
   1599 }
   1600 def : InstRW<[WriteMOVMSKPr], (instregex "(V?)MOVMSKP(S|D)rr")>;
   1601 
   1602 // r32 <- y.
   1603 def WriteVMOVMSKPYr : SchedWriteRes<[HWPort0]> {
   1604   let Latency = 2;
   1605 }
   1606 def : InstRW<[WriteVMOVMSKPYr], (instregex "VMOVMSKP(S|D)Yrr")>;
   1607 
   1608 // VPERM2F128.
   1609 def : InstRW<[WriteFShuffle256], (instregex "VPERM2F128rr")>;
   1610 def : InstRW<[WriteFShuffle256Ld, ReadAfterLd], (instregex "VPERM2F128rm")>;
   1611 
   1612 // BLENDVP S/D.
   1613 def : InstRW<[WriteFVarBlend], (instregex "BLENDVP(S|D)rr0")>;
   1614 def : InstRW<[WriteFVarBlendLd, ReadAfterLd], (instregex "BLENDVP(S|D)rm0")>;
   1615 
   1616 // VBROADCASTF128.
   1617 def : InstRW<[WriteLoad], (instregex "VBROADCASTF128")>;
   1618 
   1619 // EXTRACTPS.
   1620 // r32,x,i.
   1621 def WriteEXTRACTPSr : SchedWriteRes<[HWPort0, HWPort5]> {
   1622   let NumMicroOps = 2;
   1623   let ResourceCycles = [1, 1];
   1624 }
   1625 def : InstRW<[WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
   1626 
   1627 // m32,x,i.
   1628 def WriteEXTRACTPSm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> {
   1629   let Latency = 4;
   1630   let NumMicroOps = 3;
   1631   let ResourceCycles = [1, 1, 1];
   1632 }
   1633 def : InstRW<[WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
   1634 
   1635 // VEXTRACTF128.
   1636 // x,y,i.
   1637 def : InstRW<[WriteFShuffle256], (instregex "VEXTRACTF128rr")>;
   1638 
   1639 // m128,y,i.
   1640 def WriteVEXTRACTF128m : SchedWriteRes<[HWPort23, HWPort4]> {
   1641   let Latency = 4;
   1642   let NumMicroOps = 2;
   1643   let ResourceCycles = [1, 1];
   1644 }
   1645 def : InstRW<[WriteVEXTRACTF128m], (instregex "VEXTRACTF128mr")>;
   1646 
   1647 // VINSERTF128.
   1648 // y,y,x,i.
   1649 def : InstRW<[WriteFShuffle256], (instregex "VINSERTF128rr")>;
   1650 
   1651 // y,y,m128,i.
   1652 def WriteVINSERTF128m : SchedWriteRes<[HWPort015, HWPort23]> {
   1653   let Latency = 4;
   1654   let NumMicroOps = 2;
   1655   let ResourceCycles = [1, 1];
   1656 }
   1657 def : InstRW<[WriteFShuffle256, ReadAfterLd], (instregex "VINSERTF128rm")>;
   1658 
   1659 // VMASKMOVP S/D.
   1660 // v,v,m.
   1661 def WriteVMASKMOVPrm : SchedWriteRes<[HWPort5, HWPort23]> {
   1662   let Latency = 4;
   1663   let NumMicroOps = 3;
   1664   let ResourceCycles = [2, 1];
   1665 }
   1666 def : InstRW<[WriteVMASKMOVPrm], (instregex "VMASKMOVP(S|D)(Y?)rm")>;
   1667 
   1668 // m128,x,x.
   1669 def WriteVMASKMOVPmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> {
   1670   let Latency = 13;
   1671   let NumMicroOps = 4;
   1672   let ResourceCycles = [1, 1, 1, 1];
   1673 }
   1674 def : InstRW<[WriteVMASKMOVPmr], (instregex "VMASKMOVP(S|D)mr")>;
   1675 
   1676 // m256,y,y.
   1677 def WriteVMASKMOVPYmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> {
   1678   let Latency = 14;
   1679   let NumMicroOps = 4;
   1680   let ResourceCycles = [1, 1, 1, 1];
   1681 }
   1682 def : InstRW<[WriteVMASKMOVPYmr], (instregex "VMASKMOVP(S|D)Ymr")>;
   1683 
   1684 // VGATHERDPS.
   1685 // x.
   1686 def WriteVGATHERDPS128 : SchedWriteRes<[]> {
   1687   let NumMicroOps = 20;
   1688 }
   1689 def : InstRW<[WriteVGATHERDPS128, ReadAfterLd], (instregex "VGATHERDPSrm")>;
   1690 
   1691 // y.
   1692 def WriteVGATHERDPS256 : SchedWriteRes<[]> {
   1693   let NumMicroOps = 34;
   1694 }
   1695 def : InstRW<[WriteVGATHERDPS256, ReadAfterLd], (instregex "VGATHERDPSYrm")>;
   1696 
   1697 // VGATHERQPS.
   1698 // x.
   1699 def WriteVGATHERQPS128 : SchedWriteRes<[]> {
   1700   let NumMicroOps = 15;
   1701 }
   1702 def : InstRW<[WriteVGATHERQPS128, ReadAfterLd], (instregex "VGATHERQPSrm")>;
   1703 
   1704 // y.
   1705 def WriteVGATHERQPS256 : SchedWriteRes<[]> {
   1706   let NumMicroOps = 22;
   1707 }
   1708 def : InstRW<[WriteVGATHERQPS256, ReadAfterLd], (instregex "VGATHERQPSYrm")>;
   1709 
   1710 // VGATHERDPD.
   1711 // x.
   1712 def WriteVGATHERDPD128 : SchedWriteRes<[]> {
   1713   let NumMicroOps = 12;
   1714 }
   1715 def : InstRW<[WriteVGATHERDPD128, ReadAfterLd], (instregex "VGATHERDPDrm")>;
   1716 
   1717 // y.
   1718 def WriteVGATHERDPD256 : SchedWriteRes<[]> {
   1719   let NumMicroOps = 20;
   1720 }
   1721 def : InstRW<[WriteVGATHERDPD256, ReadAfterLd], (instregex "VGATHERDPDYrm")>;
   1722 
   1723 // VGATHERQPD.
   1724 // x.
   1725 def WriteVGATHERQPD128 : SchedWriteRes<[]> {
   1726   let NumMicroOps = 14;
   1727 }
   1728 def : InstRW<[WriteVGATHERQPD128, ReadAfterLd], (instregex "VGATHERQPDrm")>;
   1729 
   1730 // y.
   1731 def WriteVGATHERQPD256 : SchedWriteRes<[]> {
   1732   let NumMicroOps = 22;
   1733 }
   1734 def : InstRW<[WriteVGATHERQPD256, ReadAfterLd], (instregex "VGATHERQPDYrm")>;
   1735 
   1736 //-- Conversion instructions --//
   1737 
   1738 // CVTPD2PS.
   1739 // x,x.
   1740 def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVTPD2PSrr")>;
   1741 
   1742 // x,m128.
   1743 def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVTPD2PS(X?)rm")>;
   1744 
   1745 // x,y.
   1746 def WriteCVTPD2PSYrr : SchedWriteRes<[HWPort1, HWPort5]> {
   1747   let Latency = 5;
   1748   let NumMicroOps = 2;
   1749   let ResourceCycles = [1, 1];
   1750 }
   1751 def : InstRW<[WriteCVTPD2PSYrr], (instregex "(V?)CVTPD2PSYrr")>;
   1752 
   1753 // x,m256.
   1754 def WriteCVTPD2PSYrm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
   1755   let Latency = 9;
   1756   let NumMicroOps = 3;
   1757   let ResourceCycles = [1, 1, 1];
   1758 }
   1759 def : InstRW<[WriteCVTPD2PSYrm], (instregex "(V?)CVTPD2PSYrm")>;
   1760 
   1761 // CVTSD2SS.
   1762 // x,x.
   1763 def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V)?CVTSD2SSrr")>;
   1764 
   1765 // x,m64.
   1766 def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(Int_)?(V)?CVTSD2SSrm")>;
   1767 
   1768 // CVTPS2PD.
   1769 // x,x.
   1770 def WriteCVTPS2PDrr : SchedWriteRes<[HWPort0, HWPort5]> {
   1771   let Latency = 2;
   1772   let NumMicroOps = 2;
   1773   let ResourceCycles = [1, 1];
   1774 }
   1775 def : InstRW<[WriteCVTPS2PDrr], (instregex "(V?)CVTPS2PDrr")>;
   1776 
   1777 // x,m64.
   1778 // y,m128.
   1779 def WriteCVTPS2PDrm : SchedWriteRes<[HWPort0, HWPort23]> {
   1780   let Latency = 5;
   1781   let NumMicroOps = 2;
   1782   let ResourceCycles = [1, 1];
   1783 }
   1784 def : InstRW<[WriteCVTPS2PDrm], (instregex "(V?)CVTPS2PD(Y?)rm")>;
   1785 
   1786 // y,x.
   1787 def WriteVCVTPS2PDYrr : SchedWriteRes<[HWPort0, HWPort5]> {
   1788   let Latency = 5;
   1789   let NumMicroOps = 2;
   1790   let ResourceCycles = [1, 1];
   1791 }
   1792 def : InstRW<[WriteVCVTPS2PDYrr], (instregex "VCVTPS2PDYrr")>;
   1793 
   1794 // CVTSS2SD.
   1795 // x,x.
   1796 def WriteCVTSS2SDrr : SchedWriteRes<[HWPort0, HWPort5]> {
   1797   let Latency = 2;
   1798   let NumMicroOps = 2;
   1799   let ResourceCycles = [1, 1];
   1800 }
   1801 def : InstRW<[WriteCVTSS2SDrr], (instregex "(Int_)?(V?)CVTSS2SDrr")>;
   1802 
   1803 // x,m32.
   1804 def WriteCVTSS2SDrm : SchedWriteRes<[HWPort0, HWPort23]> {
   1805   let Latency = 5;
   1806   let NumMicroOps = 2;
   1807   let ResourceCycles = [1, 1];
   1808 }
   1809 def : InstRW<[WriteCVTSS2SDrm], (instregex "(Int_)?(V?)CVTSS2SDrm")>;
   1810 
   1811 // CVTDQ2PD.
   1812 // x,x.
   1813 def : InstRW<[WriteP1_P5_Lat4], (instregex "(V)?CVTDQ2PDrr")>;
   1814 
   1815 // y,x.
   1816 def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVTDQ2PDYrr")>;
   1817 
   1818 // CVT(T)PD2DQ.
   1819 // x,x.
   1820 def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVT(T?)PD2DQrr")>;
   1821 // x,m128.
   1822 def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVT(T?)PD2DQrm")>;
   1823 // x,y.
   1824 def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVT(T?)PD2DQYrr")>;
   1825 // x,m256.
   1826 def : InstRW<[WriteP1_P5_Lat6Ld], (instregex "VCVT(T?)PD2DQYrm")>;
   1827 
   1828 // CVT(T)PS2PI.
   1829 // mm,x.
   1830 def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PS2PIirr")>;
   1831 
   1832 // CVTPI2PD.
   1833 // x,mm.
   1834 def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PI2PDirr")>;
   1835 
   1836 // CVT(T)PD2PI.
   1837 // mm,x.
   1838 def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PD2PIirr")>;
   1839 
   1840 // CVSTSI2SS.
   1841 // x,r32.
   1842 def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V?)CVT(T?)SI2SS(64)?rr")>;
   1843 
   1844 // CVT(T)SS2SI.
   1845 // r32,x.
   1846 def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rr")>;
   1847 // r32,m32.
   1848 def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rm")>;
   1849 
   1850 // CVTSI2SD.
   1851 // x,r32/64.
   1852 def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVTSI2SS(64)?rr")>;
   1853 
   1854 // CVTSD2SI.
   1855 // r32/64
   1856 def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rr")>;
   1857 // r32,m32.
   1858 def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rm")>;
   1859 
   1860 // VCVTPS2PH.
   1861 // x,v,i.
   1862 def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPS2PH(Y?)rr")>;
   1863 // m,v,i.
   1864 def : InstRW<[WriteP1_P5_Lat4Ld, WriteRMW], (instregex "VCVTPS2PH(Y?)mr")>;
   1865 
   1866 // VCVTPH2PS.
   1867 // v,x.
   1868 def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPH2PS(Y?)rr")>;
   1869 
   1870 //-- Arithmetic instructions --//
   1871 
   1872 // HADD, HSUB PS/PD
   1873 // x,x / v,v,v.
   1874 def WriteHADDSUBPr : SchedWriteRes<[HWPort1, HWPort5]> {
   1875   let Latency = 5;
   1876   let NumMicroOps = 3;
   1877   let ResourceCycles = [1, 2];
   1878 }
   1879 def : InstRW<[WriteHADDSUBPr], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rr")>;
   1880 
   1881 // x,m / v,v,m.
   1882 def WriteHADDSUBPm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
   1883   let Latency = 9;
   1884   let NumMicroOps = 4;
   1885   let ResourceCycles = [1, 2, 1];
   1886 }
   1887 def : InstRW<[WriteHADDSUBPm], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rm")>;
   1888 
   1889 // MULL SS/SD PS/PD.
   1890 // x,x / v,v,v.
   1891 def WriteMULr : SchedWriteRes<[HWPort01]> {
   1892   let Latency = 5;
   1893 }
   1894 def : InstRW<[WriteMULr], (instregex "(V?)MUL(P|S)(S|D)rr")>;
   1895 
   1896 // x,m / v,v,m.
   1897 def WriteMULm : SchedWriteRes<[HWPort01, HWPort23]> {
   1898   let Latency = 9;
   1899   let NumMicroOps = 2;
   1900   let ResourceCycles = [1, 1];
   1901 }
   1902 def : InstRW<[WriteMULm], (instregex "(V?)MUL(P|S)(S|D)rm")>;
   1903 
   1904 // VDIVPS.
   1905 // y,y,y.
   1906 def WriteVDIVPSYrr : SchedWriteRes<[HWPort0, HWPort15]> {
   1907   let Latency = 19; // 18-21 cycles.
   1908   let NumMicroOps = 3;
   1909   let ResourceCycles = [2, 1];
   1910 }
   1911 def : InstRW<[WriteVDIVPSYrr], (instregex "VDIVPSYrr")>;
   1912 
   1913 // y,y,m256.
   1914 def WriteVDIVPSYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
   1915   let Latency = 23; // 18-21 + 4 cycles.
   1916   let NumMicroOps = 4;
   1917   let ResourceCycles = [2, 1, 1];
   1918 }
   1919 def : InstRW<[WriteVDIVPSYrm, ReadAfterLd], (instregex "VDIVPSYrm")>;
   1920 
   1921 // VDIVPD.
   1922 // y,y,y.
   1923 def WriteVDIVPDYrr : SchedWriteRes<[HWPort0, HWPort15]> {
   1924   let Latency = 27; // 19-35 cycles.
   1925   let NumMicroOps = 3;
   1926   let ResourceCycles = [2, 1];
   1927 }
   1928 def : InstRW<[WriteVDIVPDYrr], (instregex "VDIVPDYrr")>;
   1929 
   1930 // y,y,m256.
   1931 def WriteVDIVPDYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
   1932   let Latency = 31; // 19-35 + 4 cycles.
   1933   let NumMicroOps = 4;
   1934   let ResourceCycles = [2, 1, 1];
   1935 }
   1936 def : InstRW<[WriteVDIVPDYrm, ReadAfterLd], (instregex "VDIVPDYrm")>;
   1937 
   1938 // VRCPPS.
   1939 // y,y.
   1940 def WriteVRCPPSr : SchedWriteRes<[HWPort0, HWPort15]> {
   1941   let Latency = 7;
   1942   let NumMicroOps = 3;
   1943   let ResourceCycles = [2, 1];
   1944 }
   1945 def : InstRW<[WriteVRCPPSr], (instregex "VRCPPSYr(_Int)?")>;
   1946 
   1947 // y,m256.
   1948 def WriteVRCPPSm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
   1949   let Latency = 11;
   1950   let NumMicroOps = 4;
   1951   let ResourceCycles = [2, 1, 1];
   1952 }
   1953 def : InstRW<[WriteVRCPPSm], (instregex "VRCPPSYm(_Int)?")>;
   1954 
   1955 // ROUND SS/SD PS/PD.
   1956 // v,v,i.
   1957 def WriteROUNDr : SchedWriteRes<[HWPort1]> {
   1958   let Latency = 6;
   1959   let NumMicroOps = 2;
   1960   let ResourceCycles = [2];
   1961 }
   1962 def : InstRW<[WriteROUNDr], (instregex "(V?)ROUND(Y?)(S|P)(S|D)r(_Int)?")>;
   1963 
   1964 // v,m,i.
   1965 def WriteROUNDm : SchedWriteRes<[HWPort1, HWPort23]> {
   1966   let Latency = 10;
   1967   let NumMicroOps = 3;
   1968   let ResourceCycles = [2, 1];
   1969 }
   1970 def : InstRW<[WriteROUNDm], (instregex "(V?)ROUND(Y?)(S|P)(S|D)m(_Int)?")>;
   1971 
   1972 // DPPS.
   1973 // x,x,i / v,v,v,i.
   1974 def WriteDPPSr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> {
   1975   let Latency = 14;
   1976   let NumMicroOps = 4;
   1977   let ResourceCycles = [2, 1, 1];
   1978 }
   1979 def : InstRW<[WriteDPPSr], (instregex "(V?)DPPS(Y?)rri")>;
   1980 
   1981 // x,m,i / v,v,m,i.
   1982 def WriteDPPSm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23, HWPort6]> {
   1983   let Latency = 18;
   1984   let NumMicroOps = 6;
   1985   let ResourceCycles = [2, 1, 1, 1, 1];
   1986 }
   1987 def : InstRW<[WriteDPPSm, ReadAfterLd], (instregex "(V?)DPPS(Y?)rmi")>;
   1988 
   1989 // DPPD.
   1990 // x,x,i.
   1991 def WriteDPPDr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> {
   1992   let Latency = 9;
   1993   let NumMicroOps = 3;
   1994   let ResourceCycles = [1, 1, 1];
   1995 }
   1996 def : InstRW<[WriteDPPDr], (instregex "(V?)DPPDrri")>;
   1997 
   1998 // x,m,i.
   1999 def WriteDPPDm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23]> {
   2000   let Latency = 13;
   2001   let NumMicroOps = 4;
   2002   let ResourceCycles = [1, 1, 1, 1];
   2003 }
   2004 def : InstRW<[WriteDPPDm], (instregex "(V?)DPPDrmi")>;
   2005 
   2006 // VFMADD.
   2007 // v,v,v.
   2008 def WriteFMADDr : SchedWriteRes<[HWPort01]> {
   2009   let Latency = 5;
   2010   let NumMicroOps = 1;
   2011 }
   2012 def : InstRW<[WriteFMADDr],
   2013     (instregex
   2014     // 3p forms.
   2015     "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)r(Y)?",
   2016     // 3s forms.
   2017     "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)r",
   2018     // 4s/4s_int forms.
   2019     "VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?",
   2020     // 4p forms.
   2021     "VF(N?)M(ADD|SUB)P(S|D)4rr(Y)?(_REV)?")>;
   2022 
   2023 // v,v,m.
   2024 def WriteFMADDm : SchedWriteRes<[HWPort01, HWPort23]> {
   2025   let Latency = 9;
   2026   let NumMicroOps = 2;
   2027   let ResourceCycles = [1, 1];
   2028 }
   2029 def : InstRW<[WriteFMADDm],
   2030     (instregex
   2031     // 3p forms.
   2032     "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)m(Y)?",
   2033     // 3s forms.
   2034     "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)m",
   2035     // 4s/4s_int forms.
   2036     "VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?",
   2037     // 4p forms.
   2038     "VF(N?)M(ADD|SUB)P(S|D)4(rm|mr)(Y)?")>;
   2039 
   2040 //-- Math instructions --//
   2041 
   2042 // VSQRTPS.
   2043 // y,y.
   2044 def WriteVSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> {
   2045   let Latency = 19;
   2046   let NumMicroOps = 3;
   2047   let ResourceCycles = [2, 1];
   2048 }
   2049 def : InstRW<[WriteVSQRTPSYr], (instregex "VSQRTPSYr")>;
   2050 
   2051 // y,m256.
   2052 def WriteVSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
   2053   let Latency = 23;
   2054   let NumMicroOps = 4;
   2055   let ResourceCycles = [2, 1, 1];
   2056 }
   2057 def : InstRW<[WriteVSQRTPSYm], (instregex "VSQRTPSYm")>;
   2058 
   2059 // VSQRTPD.
   2060 // y,y.
   2061 def WriteVSQRTPDYr : SchedWriteRes<[HWPort0, HWPort15]> {
   2062   let Latency = 28;
   2063   let NumMicroOps = 3;
   2064   let ResourceCycles = [2, 1];
   2065 }
   2066 def : InstRW<[WriteVSQRTPDYr], (instregex "VSQRTPDYr")>;
   2067 
   2068 // y,m256.
   2069 def WriteVSQRTPDYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
   2070   let Latency = 32;
   2071   let NumMicroOps = 4;
   2072   let ResourceCycles = [2, 1, 1];
   2073 }
   2074 def : InstRW<[WriteVSQRTPDYm], (instregex "VSQRTPDYm")>;
   2075 
   2076 // RSQRT SS/PS.
   2077 // x,x.
   2078 def WriteRSQRTr : SchedWriteRes<[HWPort0]> {
   2079   let Latency = 5;
   2080 }
   2081 def : InstRW<[WriteRSQRTr], (instregex "(V?)RSQRT(SS|PS)r(_Int)?")>;
   2082 
   2083 // x,m128.
   2084 def WriteRSQRTm : SchedWriteRes<[HWPort0, HWPort23]> {
   2085   let Latency = 9;
   2086   let NumMicroOps = 2;
   2087   let ResourceCycles = [1, 1];
   2088 }
   2089 def : InstRW<[WriteRSQRTm], (instregex "(V?)RSQRT(SS|PS)m(_Int)?")>;
   2090 
   2091 // RSQRTPS 256.
   2092 // y,y.
   2093 def WriteRSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> {
   2094   let Latency = 7;
   2095   let NumMicroOps = 3;
   2096   let ResourceCycles = [2, 1];
   2097 }
   2098 def : InstRW<[WriteRSQRTPSYr], (instregex "VRSQRTPSYr(_Int)?")>;
   2099 
   2100 // y,m256.
   2101 def WriteRSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
   2102   let Latency = 11;
   2103   let NumMicroOps = 4;
   2104   let ResourceCycles = [2, 1, 1];
   2105 }
   2106 def : InstRW<[WriteRSQRTPSYm], (instregex "VRSQRTPSYm(_Int)?")>;
   2107 
   2108 //-- Logic instructions --//
   2109 
   2110 // AND, ANDN, OR, XOR PS/PD.
   2111 // x,x / v,v,v.
   2112 def : InstRW<[WriteP5], (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rr")>;
   2113 // x,m / v,v,m.
   2114 def : InstRW<[WriteP5Ld, ReadAfterLd],
   2115                          (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rm")>;
   2116 
   2117 //-- Other instructions --//
   2118 
   2119 // VZEROUPPER.
   2120 def WriteVZEROUPPER : SchedWriteRes<[]> {
   2121   let NumMicroOps = 4;
   2122 }
   2123 def : InstRW<[WriteVZEROUPPER], (instregex "VZEROUPPER")>;
   2124 
   2125 // VZEROALL.
   2126 def WriteVZEROALL : SchedWriteRes<[]> {
   2127   let NumMicroOps = 12;
   2128 }
   2129 def : InstRW<[WriteVZEROALL], (instregex "VZEROALL")>;
   2130 
   2131 // LDMXCSR.
   2132 def WriteLDMXCSR : SchedWriteRes<[HWPort0, HWPort6, HWPort23]> {
   2133   let Latency = 6;
   2134   let NumMicroOps = 3;
   2135   let ResourceCycles = [1, 1, 1];
   2136 }
   2137 def : InstRW<[WriteLDMXCSR], (instregex "(V)?LDMXCSR")>;
   2138 
   2139 // STMXCSR.
   2140 def WriteSTMXCSR : SchedWriteRes<[HWPort0, HWPort4, HWPort6, HWPort237]> {
   2141   let Latency = 7;
   2142   let NumMicroOps = 4;
   2143   let ResourceCycles = [1, 1, 1, 1];
   2144 }
   2145 def : InstRW<[WriteSTMXCSR], (instregex "(V)?STMXCSR")>;
   2146 
   2147 } // SchedModel
   2148