Home | History | Annotate | Download | only in X86
      1 //=- X86ScheduleZnver1.td - X86 Znver1 Scheduling -------------*- tablegen -*-=//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the machine model for Znver1 to support instruction
     11 // scheduling and other instruction cost heuristics.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 def Znver1Model : SchedMachineModel {
     16   // Zen can decode 4 instructions per cycle.
     17   let IssueWidth = 4;
     18   // Based on the reorder buffer we define MicroOpBufferSize
     19   let MicroOpBufferSize = 192;
     20   let LoadLatency = 4;
     21   let MispredictPenalty = 17;
     22   let HighLatency = 25;
     23   let PostRAScheduler = 1;
     24 
     25   // FIXME: This variable is required for incomplete model.
     26   // We haven't catered all instructions.
     27   // So, we reset the value of this variable so as to
     28   // say that the model is incomplete.
     29   let CompleteModel = 0;
     30 }
     31 
     32 let SchedModel = Znver1Model in {
     33 
     34 // Zen can issue micro-ops to 10 different units in one cycle.
     35 // These are
     36 //  * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3)
     37 //  * Two AGU units (ZAGU0, ZAGU1)
     38 //  * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3)
     39 // AGUs feed load store queues @two loads and 1 store per cycle.
     40 
     41 // Four ALU units are defined below
     42 def ZnALU0 : ProcResource<1>;
     43 def ZnALU1 : ProcResource<1>;
     44 def ZnALU2 : ProcResource<1>;
     45 def ZnALU3 : ProcResource<1>;
     46 
     47 // Two AGU units are defined below
     48 def ZnAGU0 : ProcResource<1>;
     49 def ZnAGU1 : ProcResource<1>;
     50 
     51 // Four FPU units are defined below
     52 def ZnFPU0 : ProcResource<1>;
     53 def ZnFPU1 : ProcResource<1>;
     54 def ZnFPU2 : ProcResource<1>;
     55 def ZnFPU3 : ProcResource<1>;
     56 
     57 // FPU grouping
     58 def ZnFPU013  : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>;
     59 def ZnFPU01   : ProcResGroup<[ZnFPU0, ZnFPU1]>;
     60 def ZnFPU12   : ProcResGroup<[ZnFPU1, ZnFPU2]>;
     61 def ZnFPU13   : ProcResGroup<[ZnFPU1, ZnFPU3]>;
     62 def ZnFPU23   : ProcResGroup<[ZnFPU2, ZnFPU3]>;
     63 def ZnFPU02   : ProcResGroup<[ZnFPU0, ZnFPU2]>;
     64 def ZnFPU03   : ProcResGroup<[ZnFPU0, ZnFPU3]>;
     65 
     66 // Below are the grouping of the units.
     67 // Micro-ops to be issued to multiple units are tackled this way.
     68 
     69 // ALU grouping
     70 // ZnALU03 - 0,3 grouping
     71 def ZnALU03: ProcResGroup<[ZnALU0, ZnALU3]>;
     72 
     73 // 56 Entry (14x4 entries) Int Scheduler
     74 def ZnALU : ProcResGroup<[ZnALU0, ZnALU1, ZnALU2, ZnALU3]> {
     75   let BufferSize=56;
     76 }
     77 
     78 // 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations
     79 // but are relevant for some instructions
     80 def ZnAGU : ProcResGroup<[ZnAGU0, ZnAGU1]> {
     81   let BufferSize=28;
     82 }
     83 
     84 // Integer Multiplication issued on ALU1.
     85 def ZnMultiplier : ProcResource<1>;
     86 
     87 // Integer division issued on ALU2.
     88 def ZnDivider : ProcResource<1>;
     89 
     90 // 4 Cycles load-to use Latency is captured
     91 def : ReadAdvance<ReadAfterLd, 4>;
     92 
     93 // The Integer PRF for Zen is 168 entries, and it holds the architectural and
     94 // speculative version of the 64-bit integer registers.
     95 // Reference: "Software Optimization Guide for AMD Family 17h Processors"
     96 def ZnIntegerPRF : RegisterFile<168, [GR64, CCR]>;
     97 
     98 // 36 Entry (9x4 entries) floating-point Scheduler
     99 def ZnFPU     : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]> {
    100 let BufferSize=36;
    101 }
    102 
    103 // The Zen FP Retire Queue renames SIMD and FP uOps onto a pool of 160 128-bit
    104 // registers. Operations on 256-bit data types are cracked into two COPs.
    105 // Reference: "Software Optimization Guide for AMD Family 17h Processors"
    106 def ZnFpuPRF: RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>;
    107 
    108 // The unit can track up to 192 macro ops in-flight.
    109 // The retire unit handles in-order commit of up to 8 macro ops per cycle.
    110 // Reference: "Software Optimization Guide for AMD Family 17h Processors"
    111 // To be noted, the retire unit is shared between integer and FP ops.
    112 // In SMT mode it is 96 entry per thread. But, we do not use the conservative
    113 // value here because there is currently no way to fully mode the SMT mode,
    114 // so there is no point in trying.
    115 def ZnRCU : RetireControlUnit<192, 8>;
    116 
    117 // FIXME: there are 72 read buffers and 44 write buffers.
    118 
    119 // (a folded load is an instruction that loads and does some operation)
    120 // Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops
    121 // Instructions with folded loads are usually micro-fused, so they only appear
    122 // as two micro-ops.
    123 //      a. load and
    124 //      b. addpd
    125 // This multiclass is for folded loads for integer units.
    126 multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW,
    127                           list<ProcResourceKind> ExePorts,
    128                           int Lat, list<int> Res = [], int UOps = 1,
    129                           int LoadLat = 4, int LoadUOps = 1> {
    130   // Register variant takes 1-cycle on Execution Port.
    131   def : WriteRes<SchedRW, ExePorts> {
    132     let Latency = Lat;
    133     let ResourceCycles = Res;
    134     let NumMicroOps = UOps;
    135   }
    136 
    137   // Memory variant also uses a cycle on ZnAGU
    138   // adds LoadLat cycles to the latency (default = 4).
    139   def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> {
    140     let Latency = !add(Lat, LoadLat);
    141     let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
    142     let NumMicroOps = !add(UOps, LoadUOps);
    143   }
    144 }
    145 
    146 // This multiclass is for folded loads for floating point units.
    147 multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
    148                           list<ProcResourceKind> ExePorts,
    149                           int Lat, list<int> Res = [], int UOps = 1,
    150                           int LoadLat = 7, int LoadUOps = 0> {
    151   // Register variant takes 1-cycle on Execution Port.
    152   def : WriteRes<SchedRW, ExePorts> {
    153     let Latency = Lat;
    154     let ResourceCycles = Res;
    155     let NumMicroOps = UOps;
    156   }
    157 
    158   // Memory variant also uses a cycle on ZnAGU
    159   // adds LoadLat cycles to the latency (default = 7).
    160   def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> {
    161     let Latency = !add(Lat, LoadLat);
    162     let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
    163     let NumMicroOps = !add(UOps, LoadUOps);
    164   }
    165 }
    166 
    167 // WriteRMW is set for instructions with Memory write
    168 // operation in codegen
    169 def : WriteRes<WriteRMW, [ZnAGU]>;
    170 
    171 def : WriteRes<WriteStore,   [ZnAGU]>;
    172 def : WriteRes<WriteStoreNT, [ZnAGU]>;
    173 def : WriteRes<WriteMove,    [ZnALU]>;
    174 def : WriteRes<WriteLoad,    [ZnAGU]> { let Latency = 8; }
    175 
    176 def : WriteRes<WriteZero,  []>;
    177 def : WriteRes<WriteLEA, [ZnALU]>;
    178 defm : ZnWriteResPair<WriteALU,   [ZnALU], 1>;
    179 defm : ZnWriteResPair<WriteADC,   [ZnALU], 1>;
    180 defm : ZnWriteResPair<WriteIMul,   [ZnALU1, ZnMultiplier], 4>;
    181 defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
    182 
    183 defm : X86WriteRes<WriteBSWAP32, [ZnALU], 1, [4], 1>;
    184 defm : X86WriteRes<WriteBSWAP64, [ZnALU], 1, [4], 1>;
    185 
    186 defm : ZnWriteResPair<WriteShift, [ZnALU], 1>;
    187 
    188 defm : X86WriteRes<WriteSHDrri, [ZnALU], 1, [1], 1>;
    189 defm : X86WriteResUnsupported<WriteSHDrrcl>;
    190 defm : X86WriteResUnsupported<WriteSHDmri>;
    191 defm : X86WriteResUnsupported<WriteSHDmrcl>;
    192 
    193 defm : ZnWriteResPair<WriteJump,  [ZnALU], 1>;
    194 defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>;
    195 
    196 defm : ZnWriteResPair<WriteCMOV,   [ZnALU], 1>;
    197 defm : ZnWriteResPair<WriteCMOV2,  [ZnALU], 1>;
    198 def  : WriteRes<WriteSETCC,  [ZnALU]>;
    199 def  : WriteRes<WriteSETCCStore,  [ZnALU, ZnAGU]>;
    200 defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>;
    201 def  : WriteRes<WriteBitTest,[ZnALU]>;
    202 
    203 // Bit counts.
    204 defm : ZnWriteResPair<WriteBSF, [ZnALU], 3>;
    205 defm : ZnWriteResPair<WriteBSR, [ZnALU], 3>;
    206 defm : ZnWriteResPair<WriteLZCNT,          [ZnALU], 2>;
    207 defm : ZnWriteResPair<WriteTZCNT,          [ZnALU], 2>;
    208 defm : ZnWriteResPair<WritePOPCNT,         [ZnALU], 1>;
    209 
    210 // Treat misc copies as a move.
    211 def : InstRW<[WriteMove], (instrs COPY)>;
    212 
    213 // BMI1 BEXTR, BMI2 BZHI
    214 defm : ZnWriteResPair<WriteBEXTR, [ZnALU], 1>;
    215 defm : ZnWriteResPair<WriteBZHI, [ZnALU], 1>;
    216 
    217 // IDIV
    218 defm : ZnWriteResPair<WriteDiv8,   [ZnALU2, ZnDivider], 15, [1,15], 1>;
    219 defm : ZnWriteResPair<WriteDiv16,  [ZnALU2, ZnDivider], 17, [1,17], 2>;
    220 defm : ZnWriteResPair<WriteDiv32,  [ZnALU2, ZnDivider], 25, [1,25], 2>;
    221 defm : ZnWriteResPair<WriteDiv64,  [ZnALU2, ZnDivider], 41, [1,41], 2>;
    222 defm : ZnWriteResPair<WriteIDiv8,  [ZnALU2, ZnDivider], 15, [1,15], 1>;
    223 defm : ZnWriteResPair<WriteIDiv16, [ZnALU2, ZnDivider], 17, [1,17], 2>;
    224 defm : ZnWriteResPair<WriteIDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>;
    225 defm : ZnWriteResPair<WriteIDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>;
    226 
    227 // IMULH
    228 def  : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{
    229   let Latency = 4;
    230 }
    231 
    232 // Floating point operations
    233 defm : X86WriteRes<WriteFLoad,         [ZnAGU], 8, [1], 1>;
    234 defm : X86WriteRes<WriteFLoadX,        [ZnAGU], 8, [1], 1>;
    235 defm : X86WriteRes<WriteFLoadY,        [ZnAGU], 8, [1], 1>;
    236 defm : X86WriteRes<WriteFMaskedLoad,   [ZnAGU,ZnFPU01], 8, [1,1], 1>;
    237 defm : X86WriteRes<WriteFMaskedLoadY,  [ZnAGU,ZnFPU01], 8, [1,2], 2>;
    238 defm : X86WriteRes<WriteFStore,        [ZnAGU], 1, [1], 1>;
    239 defm : X86WriteRes<WriteFStoreX,       [ZnAGU], 1, [1], 1>;
    240 defm : X86WriteRes<WriteFStoreY,       [ZnAGU], 1, [1], 1>;
    241 defm : X86WriteRes<WriteFStoreNT,      [ZnAGU,ZnFPU2], 8, [1,1], 1>;
    242 defm : X86WriteRes<WriteFStoreNTX,     [ZnAGU], 1, [1], 1>;
    243 defm : X86WriteRes<WriteFStoreNTY,     [ZnAGU], 1, [1], 1>;
    244 defm : X86WriteRes<WriteFMaskedStore,  [ZnAGU,ZnFPU01], 4, [1,1], 1>;
    245 defm : X86WriteRes<WriteFMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
    246 defm : X86WriteRes<WriteFMove,         [ZnFPU], 1, [1], 1>;
    247 defm : X86WriteRes<WriteFMoveX,        [ZnFPU], 1, [1], 1>;
    248 defm : X86WriteRes<WriteFMoveY,        [ZnFPU], 1, [1], 1>;
    249 
    250 defm : ZnWriteResFpuPair<WriteFAdd,      [ZnFPU0],  3>;
    251 defm : ZnWriteResFpuPair<WriteFAddX,     [ZnFPU0],  3>;
    252 defm : ZnWriteResFpuPair<WriteFAddY,     [ZnFPU0],  3>;
    253 defm : X86WriteResPairUnsupported<WriteFAddZ>;
    254 defm : ZnWriteResFpuPair<WriteFAdd64,    [ZnFPU0],  3>;
    255 defm : ZnWriteResFpuPair<WriteFAdd64X,   [ZnFPU0],  3>;
    256 defm : ZnWriteResFpuPair<WriteFAdd64Y,   [ZnFPU0],  3>;
    257 defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
    258 defm : ZnWriteResFpuPair<WriteFCmp,      [ZnFPU0],  3>;
    259 defm : ZnWriteResFpuPair<WriteFCmpX,     [ZnFPU0],  3>;
    260 defm : ZnWriteResFpuPair<WriteFCmpY,     [ZnFPU0],  3>;
    261 defm : X86WriteResPairUnsupported<WriteFCmpZ>;
    262 defm : ZnWriteResFpuPair<WriteFCmp64,    [ZnFPU0],  3>;
    263 defm : ZnWriteResFpuPair<WriteFCmp64X,   [ZnFPU0],  3>;
    264 defm : ZnWriteResFpuPair<WriteFCmp64Y,   [ZnFPU0],  3>;
    265 defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
    266 defm : ZnWriteResFpuPair<WriteFCom,      [ZnFPU0],  3>;
    267 defm : ZnWriteResFpuPair<WriteFBlend,    [ZnFPU01], 1>;
    268 defm : ZnWriteResFpuPair<WriteFBlendY,   [ZnFPU01], 1>;
    269 defm : X86WriteResPairUnsupported<WriteFBlendZ>;
    270 defm : ZnWriteResFpuPair<WriteFVarBlend, [ZnFPU01], 1>;
    271 defm : ZnWriteResFpuPair<WriteFVarBlendY,[ZnFPU01], 1>;
    272 defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
    273 defm : ZnWriteResFpuPair<WriteVarBlend,  [ZnFPU0],  1>;
    274 defm : ZnWriteResFpuPair<WriteVarBlendY, [ZnFPU0],  1>;
    275 defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
    276 defm : ZnWriteResFpuPair<WriteCvtSS2I,   [ZnFPU3],  5>;
    277 defm : ZnWriteResFpuPair<WriteCvtPS2I,   [ZnFPU3],  5>;
    278 defm : ZnWriteResFpuPair<WriteCvtPS2IY,  [ZnFPU3],  5>;
    279 defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
    280 defm : ZnWriteResFpuPair<WriteCvtSD2I,   [ZnFPU3],  5>;
    281 defm : ZnWriteResFpuPair<WriteCvtPD2I,   [ZnFPU3],  5>;
    282 defm : ZnWriteResFpuPair<WriteCvtPD2IY,  [ZnFPU3],  5>;
    283 defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
    284 defm : ZnWriteResFpuPair<WriteCvtI2SS,   [ZnFPU3],  5>;
    285 defm : ZnWriteResFpuPair<WriteCvtI2PS,   [ZnFPU3],  5>;
    286 defm : ZnWriteResFpuPair<WriteCvtI2PSY,  [ZnFPU3],  5>;
    287 defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
    288 defm : ZnWriteResFpuPair<WriteCvtI2SD,   [ZnFPU3],  5>;
    289 defm : ZnWriteResFpuPair<WriteCvtI2PD,   [ZnFPU3],  5>;
    290 defm : ZnWriteResFpuPair<WriteCvtI2PDY,  [ZnFPU3],  5>;
    291 defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
    292 defm : ZnWriteResFpuPair<WriteFDiv,      [ZnFPU3], 15>;
    293 defm : ZnWriteResFpuPair<WriteFDivX,     [ZnFPU3], 15>;
    294 //defm : ZnWriteResFpuPair<WriteFDivY,     [ZnFPU3], 15>;
    295 defm : X86WriteResPairUnsupported<WriteFDivZ>;
    296 defm : ZnWriteResFpuPair<WriteFDiv64,    [ZnFPU3], 15>;
    297 defm : ZnWriteResFpuPair<WriteFDiv64X,   [ZnFPU3], 15>;
    298 //defm : ZnWriteResFpuPair<WriteFDiv64Y,   [ZnFPU3], 15>;
    299 defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
    300 defm : ZnWriteResFpuPair<WriteFSign,     [ZnFPU3],  2>;
    301 defm : ZnWriteResFpuPair<WriteFRnd,      [ZnFPU3],  4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
    302 defm : ZnWriteResFpuPair<WriteFRndY,     [ZnFPU3],  4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
    303 defm : X86WriteResPairUnsupported<WriteFRndZ>;
    304 defm : ZnWriteResFpuPair<WriteFLogic,    [ZnFPU],   1>;
    305 defm : ZnWriteResFpuPair<WriteFLogicY,   [ZnFPU],   1>;
    306 defm : X86WriteResPairUnsupported<WriteFLogicZ>;
    307 defm : ZnWriteResFpuPair<WriteFTest,     [ZnFPU],   1>;
    308 defm : ZnWriteResFpuPair<WriteFTestY,    [ZnFPU],   1>;
    309 defm : X86WriteResPairUnsupported<WriteFTestZ>;
    310 defm : ZnWriteResFpuPair<WriteFShuffle,  [ZnFPU12], 1>;
    311 defm : ZnWriteResFpuPair<WriteFShuffleY, [ZnFPU12], 1>;
    312 defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
    313 defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>;
    314 defm : ZnWriteResFpuPair<WriteFVarShuffleY,[ZnFPU12], 1>;
    315 defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
    316 defm : ZnWriteResFpuPair<WriteFMul,      [ZnFPU01], 3, [1], 1, 7, 1>;
    317 defm : ZnWriteResFpuPair<WriteFMulX,     [ZnFPU01], 3, [1], 1, 7, 1>;
    318 defm : ZnWriteResFpuPair<WriteFMulY,     [ZnFPU01], 4, [1], 1, 7, 1>;
    319 defm : X86WriteResPairUnsupported<WriteFMulZ>;
    320 defm : ZnWriteResFpuPair<WriteFMul64,    [ZnFPU01], 3, [1], 1, 7, 1>;
    321 defm : ZnWriteResFpuPair<WriteFMul64X,   [ZnFPU01], 3, [1], 1, 7, 1>;
    322 defm : ZnWriteResFpuPair<WriteFMul64Y,   [ZnFPU01], 4, [1], 1, 7, 1>;
    323 defm : X86WriteResPairUnsupported<WriteFMul64Z>;
    324 defm : ZnWriteResFpuPair<WriteFMA,       [ZnFPU03], 5>;
    325 defm : ZnWriteResFpuPair<WriteFMAX,      [ZnFPU03], 5>;
    326 defm : ZnWriteResFpuPair<WriteFMAY,      [ZnFPU03], 5>;
    327 defm : X86WriteResPairUnsupported<WriteFMAZ>;
    328 defm : ZnWriteResFpuPair<WriteFRcp,      [ZnFPU01], 5>;
    329 defm : ZnWriteResFpuPair<WriteFRcpX,     [ZnFPU01], 5>;
    330 defm : ZnWriteResFpuPair<WriteFRcpY,     [ZnFPU01], 5, [1], 1, 7, 2>;
    331 defm : X86WriteResPairUnsupported<WriteFRcpZ>;
    332 //defm : ZnWriteResFpuPair<WriteFRsqrt,    [ZnFPU02], 5>;
    333 defm : ZnWriteResFpuPair<WriteFRsqrtX,   [ZnFPU01], 5, [1], 1, 7, 1>;
    334 //defm : ZnWriteResFpuPair<WriteFRsqrtY,   [ZnFPU01], 5, [2], 2>;
    335 defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
    336 defm : ZnWriteResFpuPair<WriteFSqrt,     [ZnFPU3], 20, [20]>;
    337 defm : ZnWriteResFpuPair<WriteFSqrtX,    [ZnFPU3], 20, [20]>;
    338 defm : ZnWriteResFpuPair<WriteFSqrtY,    [ZnFPU3], 28, [28], 1, 7, 1>;
    339 defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
    340 defm : ZnWriteResFpuPair<WriteFSqrt64,   [ZnFPU3], 20, [20]>;
    341 defm : ZnWriteResFpuPair<WriteFSqrt64X,  [ZnFPU3], 20, [20]>;
    342 defm : ZnWriteResFpuPair<WriteFSqrt64Y,  [ZnFPU3], 40, [40], 1, 7, 1>;
    343 defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
    344 defm : ZnWriteResFpuPair<WriteFSqrt80,   [ZnFPU3], 20, [20]>;
    345 
    346 // Vector integer operations which uses FPU units
    347 defm : X86WriteRes<WriteVecLoad,         [ZnAGU], 8, [1], 1>;
    348 defm : X86WriteRes<WriteVecLoadX,        [ZnAGU], 8, [1], 1>;
    349 defm : X86WriteRes<WriteVecLoadY,        [ZnAGU], 8, [1], 1>;
    350 defm : X86WriteRes<WriteVecLoadNT,       [ZnAGU], 8, [1], 1>;
    351 defm : X86WriteRes<WriteVecLoadNTY,      [ZnAGU], 8, [1], 1>;
    352 defm : X86WriteRes<WriteVecMaskedLoad,   [ZnAGU,ZnFPU01], 8, [1,2], 2>;
    353 defm : X86WriteRes<WriteVecMaskedLoadY,  [ZnAGU,ZnFPU01], 9, [1,3], 2>;
    354 defm : X86WriteRes<WriteVecStore,        [ZnAGU], 1, [1], 1>;
    355 defm : X86WriteRes<WriteVecStoreX,       [ZnAGU], 1, [1], 1>;
    356 defm : X86WriteRes<WriteVecStoreY,       [ZnAGU], 1, [1], 1>;
    357 defm : X86WriteRes<WriteVecStoreNT,      [ZnAGU], 1, [1], 1>;
    358 defm : X86WriteRes<WriteVecStoreNTY,     [ZnAGU], 1, [1], 1>;
    359 defm : X86WriteRes<WriteVecMaskedStore,  [ZnAGU,ZnFPU01], 4, [1,1], 1>;
    360 defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
    361 defm : X86WriteRes<WriteVecMove,         [ZnFPU], 1, [1], 1>;
    362 defm : X86WriteRes<WriteVecMoveX,        [ZnFPU], 1, [1], 1>;
    363 defm : X86WriteRes<WriteVecMoveY,        [ZnFPU], 2, [1], 2>;
    364 defm : X86WriteRes<WriteVecMoveToGpr,    [ZnFPU2], 2, [1], 1>;
    365 defm : X86WriteRes<WriteVecMoveFromGpr,  [ZnFPU2], 3, [1], 1>;
    366 defm : X86WriteRes<WriteEMMS,            [ZnFPU], 2, [1], 1>;
    367 
    368 defm : ZnWriteResFpuPair<WriteVecShift,   [ZnFPU],   1>;
    369 defm : ZnWriteResFpuPair<WriteVecShiftX,  [ZnFPU2],  1>;
    370 defm : ZnWriteResFpuPair<WriteVecShiftY,  [ZnFPU2],  2>;
    371 defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
    372 defm : ZnWriteResFpuPair<WriteVecShiftImm,  [ZnFPU], 1>;
    373 defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU], 1>;
    374 defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU], 1>;
    375 defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
    376 defm : ZnWriteResFpuPair<WriteVecLogic,   [ZnFPU],   1>;
    377 defm : ZnWriteResFpuPair<WriteVecLogicX,  [ZnFPU],   1>;
    378 defm : ZnWriteResFpuPair<WriteVecLogicY,  [ZnFPU],   1>;
    379 defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
    380 defm : ZnWriteResFpuPair<WriteVecTest,    [ZnFPU12], 1, [2], 1, 7, 1>;
    381 defm : ZnWriteResFpuPair<WriteVecTestY,   [ZnFPU12], 1, [2], 1, 7, 1>;
    382 defm : X86WriteResPairUnsupported<WriteVecTestZ>;
    383 defm : ZnWriteResFpuPair<WriteVecALU,     [ZnFPU],   1>;
    384 defm : ZnWriteResFpuPair<WriteVecALUX,    [ZnFPU],   1>;
    385 defm : ZnWriteResFpuPair<WriteVecALUY,    [ZnFPU],   1>;
    386 defm : X86WriteResPairUnsupported<WriteVecALUZ>;
    387 defm : ZnWriteResFpuPair<WriteVecIMul,    [ZnFPU0],  4>;
    388 defm : ZnWriteResFpuPair<WriteVecIMulX,   [ZnFPU0],  4>;
    389 defm : ZnWriteResFpuPair<WriteVecIMulY,   [ZnFPU0],  4>;
    390 defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
    391 defm : ZnWriteResFpuPair<WritePMULLD,     [ZnFPU0],  4, [1], 1, 7, 1>; // FIXME
    392 defm : ZnWriteResFpuPair<WritePMULLDY,    [ZnFPU0],  5, [2], 1, 7, 1>; // FIXME
    393 defm : X86WriteResPairUnsupported<WritePMULLDZ>;
    394 defm : ZnWriteResFpuPair<WriteShuffle,    [ZnFPU],   1>;
    395 defm : ZnWriteResFpuPair<WriteShuffleX,   [ZnFPU],   1>;
    396 defm : ZnWriteResFpuPair<WriteShuffleY,   [ZnFPU],   1>;
    397 defm : X86WriteResPairUnsupported<WriteShuffleZ>;
    398 defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU],   1>;
    399 defm : ZnWriteResFpuPair<WriteVarShuffleX,[ZnFPU],   1>;
    400 defm : ZnWriteResFpuPair<WriteVarShuffleY,[ZnFPU],   1>;
    401 defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
    402 defm : ZnWriteResFpuPair<WriteBlend,      [ZnFPU01], 1>;
    403 defm : ZnWriteResFpuPair<WriteBlendY,     [ZnFPU01], 1>;
    404 defm : X86WriteResPairUnsupported<WriteBlendZ>;
    405 defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU],   2>;
    406 defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU],   2>;
    407 defm : ZnWriteResFpuPair<WritePSADBW,     [ZnFPU0],  3>;
    408 defm : ZnWriteResFpuPair<WritePSADBWX,    [ZnFPU0],  3>;
    409 defm : ZnWriteResFpuPair<WritePSADBWY,    [ZnFPU0],  3>;
    410 defm : X86WriteResPairUnsupported<WritePSADBWZ>;
    411 defm : ZnWriteResFpuPair<WritePHMINPOS,   [ZnFPU0],  4>;
    412 
    413 // Vector Shift Operations
    414 defm : ZnWriteResFpuPair<WriteVarVecShift,  [ZnFPU12], 1>;
    415 defm : ZnWriteResFpuPair<WriteVarVecShiftY, [ZnFPU12], 1>;
    416 defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
    417 
    418 // Vector insert/extract operations.
    419 defm : ZnWriteResFpuPair<WriteVecInsert,   [ZnFPU],   1>;
    420 
    421 def : WriteRes<WriteVecExtract, [ZnFPU12, ZnFPU2]> {
    422   let Latency = 2;
    423   let ResourceCycles = [1, 2];
    424 }
    425 def : WriteRes<WriteVecExtractSt, [ZnAGU, ZnFPU12, ZnFPU2]> {
    426   let Latency = 5;
    427   let NumMicroOps = 2;
    428   let ResourceCycles = [1, 2, 3];
    429 }
    430 
    431 // MOVMSK Instructions.
    432 def : WriteRes<WriteFMOVMSK, [ZnFPU2]>;
    433 def : WriteRes<WriteMMXMOVMSK, [ZnFPU2]>;
    434 def : WriteRes<WriteVecMOVMSK, [ZnFPU2]>;
    435 
    436 def : WriteRes<WriteVecMOVMSKY, [ZnFPU2]> {
    437   let NumMicroOps = 2;
    438   let Latency = 2;
    439   let ResourceCycles = [2];
    440 }
    441 
    442 // AES Instructions.
    443 defm : ZnWriteResFpuPair<WriteAESDecEnc, [ZnFPU01], 4>;
    444 defm : ZnWriteResFpuPair<WriteAESIMC,    [ZnFPU01], 4>;
    445 defm : ZnWriteResFpuPair<WriteAESKeyGen, [ZnFPU01], 4>;
    446 
    447 def : WriteRes<WriteFence,  [ZnAGU]>;
    448 def : WriteRes<WriteNop, []>;
    449 
    450 // Following instructions with latency=100 are microcoded.
    451 // We set long latency so as to block the entire pipeline.
    452 defm : ZnWriteResFpuPair<WriteFShuffle256, [ZnFPU], 100>;
    453 defm : ZnWriteResFpuPair<WriteFVarShuffle256, [ZnFPU], 100>;
    454 
    455 // Microcoded Instructions
    456 def ZnWriteMicrocoded : SchedWriteRes<[]> {
    457   let Latency = 100;
    458 }
    459 
    460 def : SchedAlias<WriteMicrocoded, ZnWriteMicrocoded>;
    461 def : SchedAlias<WriteFCMOV, ZnWriteMicrocoded>;
    462 def : SchedAlias<WriteSystem, ZnWriteMicrocoded>;
    463 def : SchedAlias<WriteMPSAD, ZnWriteMicrocoded>;
    464 def : SchedAlias<WriteMPSADY, ZnWriteMicrocoded>;
    465 def : SchedAlias<WriteMPSADLd, ZnWriteMicrocoded>;
    466 def : SchedAlias<WriteMPSADYLd, ZnWriteMicrocoded>;
    467 def : SchedAlias<WriteCLMul, ZnWriteMicrocoded>;
    468 def : SchedAlias<WriteCLMulLd, ZnWriteMicrocoded>;
    469 def : SchedAlias<WritePCmpIStrM, ZnWriteMicrocoded>;
    470 def : SchedAlias<WritePCmpIStrMLd, ZnWriteMicrocoded>;
    471 def : SchedAlias<WritePCmpEStrI, ZnWriteMicrocoded>;
    472 def : SchedAlias<WritePCmpEStrILd, ZnWriteMicrocoded>;
    473 def : SchedAlias<WritePCmpEStrM, ZnWriteMicrocoded>;
    474 def : SchedAlias<WritePCmpEStrMLd, ZnWriteMicrocoded>;
    475 def : SchedAlias<WritePCmpIStrI, ZnWriteMicrocoded>;
    476 def : SchedAlias<WritePCmpIStrILd, ZnWriteMicrocoded>;
    477 def : SchedAlias<WriteLDMXCSR, ZnWriteMicrocoded>;
    478 def : SchedAlias<WriteSTMXCSR, ZnWriteMicrocoded>;
    479 
    480 //=== Regex based InstRW ===//
    481 // Notation:
    482 // - r: register.
    483 // - m = memory.
    484 // - i = immediate
    485 // - mm: 64 bit mmx register.
    486 // - x = 128 bit xmm register.
    487 // - (x)mm = mmx or xmm register.
    488 // - y = 256 bit ymm register.
    489 // - v = any vector register.
    490 
    491 //=== Integer Instructions ===//
    492 //-- Move instructions --//
    493 // MOV.
    494 // r16,m.
    495 def : InstRW<[WriteALULd, ReadAfterLd], (instregex "MOV16rm")>;
    496 
    497 // MOVSX, MOVZX.
    498 // r,m.
    499 def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
    500 
    501 // XCHG.
    502 // r,r.
    503 def ZnWriteXCHG : SchedWriteRes<[ZnALU]> {
    504   let NumMicroOps = 2;
    505   let ResourceCycles = [2];
    506 }
    507 
    508 def : InstRW<[ZnWriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>;
    509 
    510 // r,m.
    511 def ZnWriteXCHGrm : SchedWriteRes<[ZnAGU, ZnALU]> {
    512   let Latency = 5;
    513   let NumMicroOps = 2;
    514 }
    515 def : InstRW<[ZnWriteXCHGrm, ReadAfterLd], (instregex "XCHG(8|16|32|64)rm")>;
    516 
    517 def : InstRW<[WriteMicrocoded], (instrs XLAT)>;
    518 
    519 // POP16.
    520 // r.
    521 def ZnWritePop16r : SchedWriteRes<[ZnAGU]>{
    522   let Latency = 5;
    523   let NumMicroOps = 2;
    524 }
    525 def : InstRW<[ZnWritePop16r], (instregex "POP16rmm")>;
    526 def : InstRW<[WriteMicrocoded], (instregex "POPF(16|32)")>;
    527 def : InstRW<[WriteMicrocoded], (instregex "POPA(16|32)")>;
    528 
    529 
    530 // PUSH.
    531 // r. Has default values.
    532 // m.
    533 def ZnWritePUSH : SchedWriteRes<[ZnAGU]>{
    534   let Latency = 4;
    535 }
    536 def : InstRW<[ZnWritePUSH], (instregex "PUSH(16|32)rmm")>;
    537 
    538 //PUSHF
    539 def : InstRW<[WriteMicrocoded], (instregex "PUSHF(16|32)")>;
    540 
    541 // PUSHA.
    542 def ZnWritePushA : SchedWriteRes<[ZnAGU]> {
    543   let Latency = 8;
    544 }
    545 def : InstRW<[ZnWritePushA], (instregex "PUSHA(16|32)")>;
    546 
    547 //LAHF
    548 def : InstRW<[WriteMicrocoded], (instrs LAHF)>;
    549 
    550 // MOVBE.
    551 // r,m.
    552 def ZnWriteMOVBE : SchedWriteRes<[ZnAGU, ZnALU]> {
    553   let Latency = 5;
    554 }
    555 def : InstRW<[ZnWriteMOVBE, ReadAfterLd], (instregex "MOVBE(16|32|64)rm")>;
    556 
    557 // m16,r16.
    558 def : InstRW<[ZnWriteMOVBE], (instregex "MOVBE(16|32|64)mr")>;
    559 
    560 //-- Arithmetic instructions --//
    561 
    562 // ADD SUB.
    563 // m,r/i.
    564 def : InstRW<[WriteALULd], (instregex "(ADD|SUB)(8|16|32|64)m(r|i)",
    565                           "(ADD|SUB)(8|16|32|64)mi8",
    566                           "(ADD|SUB)64mi32")>;
    567 
    568 // ADC SBB.
    569 // m,r/i.
    570 def : InstRW<[WriteALULd],
    571              (instregex "(ADC|SBB)(8|16|32|64)m(r|i)",
    572               "(ADC|SBB)(16|32|64)mi8",
    573               "(ADC|SBB)64mi32")>;
    574 
    575 // INC DEC NOT NEG.
    576 // m.
    577 def : InstRW<[WriteALULd],
    578              (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m")>;
    579 
    580 // MUL IMUL.
    581 // r16.
    582 def ZnWriteMul16 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
    583   let Latency = 3;
    584 }
    585 def : InstRW<[ZnWriteMul16], (instrs IMUL16r, MUL16r)>;
    586 def : InstRW<[ZnWriteMul16], (instrs IMUL16rr, IMUL16rri, IMUL16rri8)>; // TODO: is this right?
    587 def : InstRW<[ZnWriteMul16], (instrs IMUL16rm, IMUL16rmi, IMUL16rmi8)>; // TODO: this is definitely wrong but matches what the instregex did.
    588 
    589 // m16.
    590 def ZnWriteMul16Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
    591   let Latency = 8;
    592 }
    593 def : InstRW<[ZnWriteMul16Ld, ReadAfterLd], (instrs IMUL16m, MUL16m)>;
    594 
    595 // r32.
    596 def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
    597   let Latency = 3;
    598 }
    599 def : InstRW<[ZnWriteMul32], (instrs IMUL32r, MUL32r)>;
    600 def : InstRW<[ZnWriteMul32], (instrs IMUL32rr, IMUL32rri, IMUL32rri8)>; // TODO: is this right?
    601 def : InstRW<[ZnWriteMul32], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8)>; // TODO: this is definitely wrong but matches what the instregex did.
    602 
    603 // m32.
    604 def ZnWriteMul32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
    605   let Latency = 8;
    606 }
    607 def : InstRW<[ZnWriteMul32Ld, ReadAfterLd], (instrs IMUL32m, MUL32m)>;
    608 
    609 // r64.
    610 def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
    611   let Latency = 4;
    612   let NumMicroOps = 2;
    613 }
    614 def : InstRW<[ZnWriteMul64], (instrs IMUL64r, MUL64r)>;
    615 def : InstRW<[ZnWriteMul64], (instrs IMUL64rr, IMUL64rri8, IMUL64rri32)>; // TODO: is this right?
    616 def : InstRW<[ZnWriteMul64], (instrs IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; // TODO: this is definitely wrong but matches what the instregex did.
    617 
    618 // m64.
    619 def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
    620   let Latency = 9;
    621   let NumMicroOps = 2;
    622 }
    623 def : InstRW<[ZnWriteMul64Ld, ReadAfterLd], (instrs IMUL64m, MUL64m)>;
    624 
    625 // MULX.
    626 // r32,r32,r32.
    627 def ZnWriteMulX32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
    628   let Latency = 3;
    629   let ResourceCycles = [1, 2];
    630 }
    631 def : InstRW<[ZnWriteMulX32], (instrs MULX32rr)>;
    632 
    633 // r32,r32,m32.
    634 def ZnWriteMulX32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
    635   let Latency = 8;
    636   let ResourceCycles = [1, 2, 2];
    637 }
    638 def : InstRW<[ZnWriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>;
    639 
    640 // r64,r64,r64.
    641 def ZnWriteMulX64 : SchedWriteRes<[ZnALU1]> {
    642   let Latency = 3;
    643 }
    644 def : InstRW<[ZnWriteMulX64], (instrs MULX64rr)>;
    645 
    646 // r64,r64,m64.
    647 def ZnWriteMulX64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
    648   let Latency = 8;
    649 }
    650 def : InstRW<[ZnWriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
    651 
    652 //-- Control transfer instructions --//
    653 
    654 // J(E|R)CXZ.
    655 def ZnWriteJCXZ : SchedWriteRes<[ZnALU03]>;
    656 def : InstRW<[ZnWriteJCXZ], (instrs JCXZ, JECXZ, JRCXZ)>;
    657 
    658 // INTO
    659 def : InstRW<[WriteMicrocoded], (instrs INTO)>;
    660 
    661 // LOOP.
    662 def ZnWriteLOOP : SchedWriteRes<[ZnALU03]>;
    663 def : InstRW<[ZnWriteLOOP], (instrs LOOP)>;
    664 
    665 // LOOP(N)E, LOOP(N)Z
    666 def ZnWriteLOOPE : SchedWriteRes<[ZnALU03]>;
    667 def : InstRW<[ZnWriteLOOPE], (instrs LOOPE, LOOPNE)>;
    668 
    669 // CALL.
    670 // r.
    671 def ZnWriteCALLr : SchedWriteRes<[ZnAGU, ZnALU03]>;
    672 def : InstRW<[ZnWriteCALLr], (instregex "CALL(16|32)r")>;
    673 
    674 def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>;
    675 
    676 // RET.
    677 def ZnWriteRET : SchedWriteRes<[ZnALU03]> {
    678   let NumMicroOps = 2;
    679 }
    680 def : InstRW<[ZnWriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)",
    681                             "IRET(16|32|64)")>;
    682 
    683 //-- Logic instructions --//
    684 
    685 // AND OR XOR.
    686 // m,r/i.
    687 def : InstRW<[WriteALULd],
    688              (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)",
    689               "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>;
    690 
    691 // Define ALU latency variants
    692 def ZnWriteALULat2 : SchedWriteRes<[ZnALU]> {
    693   let Latency = 2;
    694 }
    695 def ZnWriteALULat2Ld : SchedWriteRes<[ZnAGU, ZnALU]> {
    696   let Latency = 6;
    697 }
    698 
    699 // BT.
    700 // m,i.
    701 def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>;
    702 
    703 // BTR BTS BTC.
    704 // r,r,i.
    705 def ZnWriteBTRSC : SchedWriteRes<[ZnALU]> {
    706   let Latency = 2;
    707   let NumMicroOps = 2;
    708 }
    709 def : InstRW<[ZnWriteBTRSC], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>;
    710 
    711 // m,r,i.
    712 def ZnWriteBTRSCm : SchedWriteRes<[ZnAGU, ZnALU]> {
    713   let Latency = 6;
    714   let NumMicroOps = 2;
    715 }
    716 // m,r,i.
    717 def : InstRW<[ZnWriteBTRSCm], (instregex "BT(R|S|C)(16|32|64)m(r|i8)")>;
    718 
    719 // BLSI BLSMSK BLSR.
    720 // r,r.
    721 def : InstRW<[ZnWriteALULat2], (instregex "BLS(I|MSK|R)(32|64)rr")>;
    722 // r,m.
    723 def : InstRW<[ZnWriteALULat2Ld], (instregex "BLS(I|MSK|R)(32|64)rm")>;
    724 
    725 // CLD STD.
    726 def : InstRW<[WriteALU], (instrs STD, CLD)>;
    727 
    728 // PDEP PEXT.
    729 // r,r,r.
    730 def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>;
    731 // r,r,m.
    732 def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>;
    733 
    734 // RCR RCL.
    735 // m,i.
    736 def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m(1|i|CL)")>;
    737 
    738 // SHR SHL SAR.
    739 // m,i.
    740 def : InstRW<[WriteShiftLd], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>;
    741 
    742 // SHRD SHLD.
    743 // m,r
    744 def : InstRW<[WriteShiftLd], (instregex "SH(R|L)D(16|32|64)mri8")>;
    745 
    746 // r,r,cl.
    747 def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)rrCL")>;
    748 
    749 // m,r,cl.
    750 def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>;
    751 
    752 //-- Misc instructions --//
    753 // CMPXCHG.
    754 def ZnWriteCMPXCHG : SchedWriteRes<[ZnAGU, ZnALU]> {
    755   let Latency = 8;
    756   let NumMicroOps = 5;
    757 }
    758 def : InstRW<[ZnWriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>;
    759 
    760 // CMPXCHG8B.
    761 def ZnWriteCMPXCHG8B : SchedWriteRes<[ZnAGU, ZnALU]> {
    762   let NumMicroOps = 18;
    763 }
    764 def : InstRW<[ZnWriteCMPXCHG8B], (instrs CMPXCHG8B)>;
    765 
    766 def : InstRW<[WriteMicrocoded], (instrs CMPXCHG16B)>;
    767 
    768 // LEAVE
    769 def ZnWriteLEAVE : SchedWriteRes<[ZnALU, ZnAGU]> {
    770   let Latency = 8;
    771   let NumMicroOps = 2;
    772 }
    773 def : InstRW<[ZnWriteLEAVE], (instregex "LEAVE")>;
    774 
    775 // PAUSE.
    776 def : InstRW<[WriteMicrocoded], (instrs PAUSE)>;
    777 
    778 // RDTSC.
    779 def : InstRW<[WriteMicrocoded], (instregex "RDTSC")>;
    780 
    781 // RDPMC.
    782 def : InstRW<[WriteMicrocoded], (instrs RDPMC)>;
    783 
    784 // RDRAND.
    785 def : InstRW<[WriteMicrocoded], (instregex "RDRAND(16|32|64)r")>;
    786 
    787 // XGETBV.
    788 def : InstRW<[WriteMicrocoded], (instregex "XGETBV")>;
    789 
    790 //-- String instructions --//
    791 // CMPS.
    792 def : InstRW<[WriteMicrocoded], (instregex "CMPS(B|L|Q|W)")>;
    793 
    794 // LODSB/W.
    795 def : InstRW<[WriteMicrocoded], (instregex "LODS(B|W)")>;
    796 
    797 // LODSD/Q.
    798 def : InstRW<[WriteMicrocoded], (instregex "LODS(L|Q)")>;
    799 
    800 // MOVS.
    801 def : InstRW<[WriteMicrocoded], (instregex "MOVS(B|L|Q|W)")>;
    802 
    803 // SCAS.
    804 def : InstRW<[WriteMicrocoded], (instregex "SCAS(B|W|L|Q)")>;
    805 
    806 // STOS
    807 def : InstRW<[WriteMicrocoded], (instregex "STOS(B|L|Q|W)")>;
    808 
    809 // XADD.
    810 def : InstRW<[WriteMicrocoded], (instregex "XADD(8|16|32|64)rm")>;
    811 
    812 //=== Floating Point x87 Instructions ===//
    813 //-- Move instructions --//
    814 
    815 def ZnWriteFLDr : SchedWriteRes<[ZnFPU13]> ;
    816 
    817 def ZnWriteSTr: SchedWriteRes<[ZnFPU23]> {
    818   let Latency = 5;
    819   let NumMicroOps = 2;
    820 }
    821 
    822 // LD_F.
    823 // r.
    824 def : InstRW<[ZnWriteFLDr], (instregex "LD_Frr")>;
    825 
    826 // m.
    827 def ZnWriteLD_F80m : SchedWriteRes<[ZnAGU, ZnFPU13]> {
    828   let NumMicroOps = 2;
    829 }
    830 def : InstRW<[ZnWriteLD_F80m], (instregex "LD_F80m")>;
    831 
    832 // FBLD.
    833 def : InstRW<[WriteMicrocoded], (instregex "FBLDm")>;
    834 
    835 // FST(P).
    836 // r.
    837 def : InstRW<[ZnWriteSTr], (instregex "ST_(F|FP)rr")>;
    838 
    839 // m80.
    840 def ZnWriteST_FP80m : SchedWriteRes<[ZnAGU, ZnFPU23]> {
    841   let Latency = 5;
    842 }
    843 def : InstRW<[ZnWriteST_FP80m], (instregex "ST_FP80m")>;
    844 
    845 // FBSTP.
    846 // m80.
    847 def : InstRW<[WriteMicrocoded], (instregex "FBSTPm")>;
    848 
    849 def ZnWriteFXCH : SchedWriteRes<[ZnFPU]>;
    850 
    851 // FXCHG.
    852 def : InstRW<[ZnWriteFXCH], (instrs XCH_F)>;
    853 
    854 // FILD.
    855 def ZnWriteFILD : SchedWriteRes<[ZnAGU, ZnFPU3]> {
    856   let Latency = 11;
    857   let NumMicroOps = 2;
    858 }
    859 def : InstRW<[ZnWriteFILD], (instregex "ILD_F(16|32|64)m")>;
    860 
    861 // FIST(P) FISTTP.
    862 def ZnWriteFIST : SchedWriteRes<[ZnAGU, ZnFPU23]> {
    863   let Latency = 12;
    864 }
    865 def : InstRW<[ZnWriteFIST], (instregex "IS(T|TT)_(F|FP)(16|32|64)m")>;
    866 
    867 def ZnWriteFPU13 : SchedWriteRes<[ZnAGU, ZnFPU13]> {
    868   let Latency = 8;
    869 }
    870 
    871 def ZnWriteFPU3 : SchedWriteRes<[ZnAGU, ZnFPU3]> {
    872   let Latency = 11;
    873 }
    874 
    875 // FLDZ.
    876 def : SchedAlias<WriteFLD0, ZnWriteFPU13>;
    877 
    878 // FLD1.
    879 def : SchedAlias<WriteFLD1, ZnWriteFPU3>;
    880 
    881 // FLDPI FLDL2E etc.
    882 def : SchedAlias<WriteFLDC, ZnWriteFPU3>;
    883 
    884 // FNSTSW.
    885 // AX.
    886 def : InstRW<[WriteMicrocoded], (instrs FNSTSW16r)>;
    887 
    888 // m16.
    889 def : InstRW<[WriteMicrocoded], (instrs FNSTSWm)>;
    890 
    891 // FLDCW.
    892 def : InstRW<[WriteMicrocoded], (instrs FLDCW16m)>;
    893 
    894 // FNSTCW.
    895 def : InstRW<[WriteMicrocoded], (instrs FNSTCW16m)>;
    896 
    897 // FINCSTP FDECSTP.
    898 def : InstRW<[ZnWriteFPU3], (instrs FINCSTP, FDECSTP)>;
    899 
    900 // FFREE.
    901 def : InstRW<[ZnWriteFPU3], (instregex "FFREE")>;
    902 
    903 // FNSAVE.
    904 def : InstRW<[WriteMicrocoded], (instregex "FSAVEm")>;
    905 
    906 // FRSTOR.
    907 def : InstRW<[WriteMicrocoded], (instregex "FRSTORm")>;
    908 
    909 //-- Arithmetic instructions --//
    910 
    911 def ZnWriteFPU3Lat1 : SchedWriteRes<[ZnFPU3]> ;
    912 
    913 def ZnWriteFPU0Lat1 : SchedWriteRes<[ZnFPU0]> ;
    914 
    915 def ZnWriteFPU0Lat1Ld : SchedWriteRes<[ZnAGU, ZnFPU0]> {
    916   let Latency = 8;
    917 }
    918 
    919 // FCHS.
    920 def : InstRW<[ZnWriteFPU3Lat1], (instregex "CHS_F")>;
    921 
    922 // FCOM(P) FUCOM(P).
    923 // r.
    924 def : InstRW<[ZnWriteFPU0Lat1], (instregex "COM(P?)_FST0r", "UCOM_F(P?)r")>;
    925 // m.
    926 def : InstRW<[ZnWriteFPU0Lat1Ld], (instregex "FCOM(P?)(32|64)m")>;
    927 
    928 // FCOMPP FUCOMPP.
    929 // r.
    930 def : InstRW<[ZnWriteFPU0Lat1], (instrs FCOMPP, UCOM_FPPr)>;
    931 
    932 def ZnWriteFPU02 : SchedWriteRes<[ZnAGU, ZnFPU02]>
    933 {
    934   let Latency = 9;
    935 }
    936 
    937 // FCOMI(P) FUCOMI(P).
    938 // m.
    939 def : InstRW<[ZnWriteFPU02], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
    940 
    941 def ZnWriteFPU03 : SchedWriteRes<[ZnAGU, ZnFPU03]>
    942 {
    943   let Latency = 12;
    944   let NumMicroOps = 2;
    945   let ResourceCycles = [1,3];
    946 }
    947 
    948 // FICOM(P).
    949 def : InstRW<[ZnWriteFPU03], (instregex "FICOM(P?)(16|32)m")>;
    950 
    951 // FTST.
    952 def : InstRW<[ZnWriteFPU0Lat1], (instregex "TST_F")>;
    953 
    954 // FXAM.
    955 def : InstRW<[ZnWriteFPU3Lat1], (instrs FXAM)>;
    956 
    957 // FPREM.
    958 def : InstRW<[WriteMicrocoded], (instrs FPREM)>;
    959 
    960 // FPREM1.
    961 def : InstRW<[WriteMicrocoded], (instrs FPREM1)>;
    962 
    963 // FRNDINT.
    964 def : InstRW<[WriteMicrocoded], (instrs FRNDINT)>;
    965 
    966 // FSCALE.
    967 def : InstRW<[WriteMicrocoded], (instrs FSCALE)>;
    968 
    969 // FXTRACT.
    970 def : InstRW<[WriteMicrocoded], (instrs FXTRACT)>;
    971 
    972 // FNOP.
    973 def : InstRW<[ZnWriteFPU0Lat1], (instrs FNOP)>;
    974 
    975 // WAIT.
    976 def : InstRW<[ZnWriteFPU0Lat1], (instrs WAIT)>;
    977 
    978 // FNCLEX.
    979 def : InstRW<[WriteMicrocoded], (instrs FNCLEX)>;
    980 
    981 // FNINIT.
    982 def : InstRW<[WriteMicrocoded], (instrs FNINIT)>;
    983 
    984 //=== Integer MMX and XMM Instructions ===//
    985 
    986 // PACKSSWB/DW.
    987 // mm <- mm.
    988 def ZnWriteFPU12 : SchedWriteRes<[ZnFPU12]> ;
    989 def ZnWriteFPU12Y : SchedWriteRes<[ZnFPU12]> {
    990   let NumMicroOps = 2;
    991 }
    992 def ZnWriteFPU12m : SchedWriteRes<[ZnAGU, ZnFPU12]> ;
    993 def ZnWriteFPU12Ym : SchedWriteRes<[ZnAGU, ZnFPU12]> {
    994   let Latency = 8;
    995   let NumMicroOps = 2;
    996 }
    997 
    998 def : InstRW<[ZnWriteFPU12], (instrs MMX_PACKSSDWirr,
    999                                      MMX_PACKSSWBirr,
   1000                                      MMX_PACKUSWBirr)>;
   1001 def : InstRW<[ZnWriteFPU12m], (instrs MMX_PACKSSDWirm,
   1002                                       MMX_PACKSSWBirm,
   1003                                       MMX_PACKUSWBirm)>;
   1004 
   1005 // VPMOVSX/ZX BW BD BQ WD WQ DQ.
   1006 // y <- x.
   1007 def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrr")>;
   1008 def : InstRW<[ZnWriteFPU12Ym], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrm")>;
   1009 
   1010 def ZnWriteFPU013 : SchedWriteRes<[ZnFPU013]> ;
   1011 def ZnWriteFPU013Y : SchedWriteRes<[ZnFPU013]> {
   1012   let Latency = 2;
   1013 }
   1014 def ZnWriteFPU013m : SchedWriteRes<[ZnAGU, ZnFPU013]> {
   1015   let Latency = 8;
   1016   let NumMicroOps = 2;
   1017 }
   1018 def ZnWriteFPU013Ld : SchedWriteRes<[ZnAGU, ZnFPU013]> {
   1019   let Latency = 8;
   1020   let NumMicroOps = 2;
   1021 }
   1022 def ZnWriteFPU013LdY : SchedWriteRes<[ZnAGU, ZnFPU013]> {
   1023   let Latency = 9;
   1024   let NumMicroOps = 2;
   1025 }
   1026 
   1027 // PBLENDW.
   1028 // x,x,i / v,v,v,i
   1029 def : InstRW<[ZnWriteFPU013], (instregex "(V?)PBLENDWrri")>;
   1030 // ymm
   1031 def : InstRW<[ZnWriteFPU013Y], (instrs VPBLENDWYrri)>;
   1032 
   1033 // x,m,i / v,v,m,i
   1034 def : InstRW<[ZnWriteFPU013Ld], (instregex "(V?)PBLENDWrmi")>;
   1035 // y,m,i
   1036 def : InstRW<[ZnWriteFPU013LdY], (instrs VPBLENDWYrmi)>;
   1037 
   1038 def ZnWriteFPU01 : SchedWriteRes<[ZnFPU01]> ;
   1039 def ZnWriteFPU01Y : SchedWriteRes<[ZnFPU01]> {
   1040   let NumMicroOps = 2;
   1041 }
   1042 
   1043 // VPBLENDD.
   1044 // v,v,v,i.
   1045 def : InstRW<[ZnWriteFPU01], (instrs VPBLENDDrri)>;
   1046 // ymm
   1047 def : InstRW<[ZnWriteFPU01Y], (instrs VPBLENDDYrri)>;
   1048 
   1049 // v,v,m,i
   1050 def ZnWriteFPU01Op2 : SchedWriteRes<[ZnAGU, ZnFPU01]> {
   1051   let NumMicroOps = 2;
   1052   let Latency = 8;
   1053   let ResourceCycles = [1, 2];
   1054 }
   1055 def ZnWriteFPU01Op2Y : SchedWriteRes<[ZnAGU, ZnFPU01]> {
   1056   let NumMicroOps = 2;
   1057   let Latency = 9;
   1058   let ResourceCycles = [1, 3];
   1059 }
   1060 def : InstRW<[ZnWriteFPU01Op2], (instrs VPBLENDDrmi)>;
   1061 def : InstRW<[ZnWriteFPU01Op2Y], (instrs VPBLENDDYrmi)>;
   1062 
   1063 // MASKMOVQ.
   1064 def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>;
   1065 
   1066 // MASKMOVDQU.
   1067 def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>;
   1068 
   1069 // VPMASKMOVD.
   1070 // ymm
   1071 def : InstRW<[WriteMicrocoded],
   1072                                (instregex "VPMASKMOVD(Y?)rm")>;
   1073 // m, v,v.
   1074 def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
   1075 
   1076 // VPBROADCAST B/W.
   1077 // x, m8/16.
   1078 def ZnWriteVPBROADCAST128Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
   1079   let Latency = 8;
   1080   let NumMicroOps = 2;
   1081   let ResourceCycles = [1, 2];
   1082 }
   1083 def : InstRW<[ZnWriteVPBROADCAST128Ld],
   1084                                      (instregex "VPBROADCAST(B|W)rm")>;
   1085 
   1086 // y, m8/16
   1087 def ZnWriteVPBROADCAST256Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
   1088   let Latency = 8;
   1089   let NumMicroOps = 2;
   1090   let ResourceCycles = [1, 2];
   1091 }
   1092 def : InstRW<[ZnWriteVPBROADCAST256Ld],
   1093                                      (instregex "VPBROADCAST(B|W)Yrm")>;
   1094 
   1095 // VPGATHER.
   1096 def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
   1097 
   1098 //-- Arithmetic instructions --//
   1099 
   1100 // HADD, HSUB PS/PD
   1101 // PHADD|PHSUB (S) W/D.
   1102 def : SchedAlias<WritePHAdd,    ZnWriteMicrocoded>;
   1103 def : SchedAlias<WritePHAddLd,  ZnWriteMicrocoded>;
   1104 def : SchedAlias<WritePHAddX,   ZnWriteMicrocoded>;
   1105 def : SchedAlias<WritePHAddXLd, ZnWriteMicrocoded>;
   1106 def : SchedAlias<WritePHAddY,   ZnWriteMicrocoded>;
   1107 def : SchedAlias<WritePHAddYLd, ZnWriteMicrocoded>;
   1108 
   1109 // PCMPGTQ.
   1110 def ZnWritePCMPGTQr : SchedWriteRes<[ZnFPU03]>;
   1111 def : InstRW<[ZnWritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
   1112 
   1113 // x <- x,m.
   1114 def ZnWritePCMPGTQm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
   1115   let Latency = 8;
   1116 }
   1117 // ymm.
   1118 def ZnWritePCMPGTQYm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
   1119   let Latency = 8;
   1120   let NumMicroOps = 2;
   1121   let ResourceCycles = [1,2];
   1122 }
   1123 def : InstRW<[ZnWritePCMPGTQm], (instregex "(V?)PCMPGTQrm")>;
   1124 def : InstRW<[ZnWritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
   1125 
   1126 //-- Logic instructions --//
   1127 
   1128 // PSLL,PSRL,PSRA W/D/Q.
   1129 // x,x / v,v,x.
   1130 def ZnWritePShift  : SchedWriteRes<[ZnFPU2]> ;
   1131 def ZnWritePShiftY : SchedWriteRes<[ZnFPU2]> {
   1132   let Latency = 2;
   1133 }
   1134 
   1135 // PSLL,PSRL DQ.
   1136 def : InstRW<[ZnWritePShift], (instregex "(V?)PS(R|L)LDQri")>;
   1137 def : InstRW<[ZnWritePShiftY], (instregex "(V?)PS(R|L)LDQYri")>;
   1138 
   1139 //=== Floating Point XMM and YMM Instructions ===//
   1140 //-- Move instructions --//
   1141 
   1142 // VPERM2F128.
   1143 def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr)>;
   1144 def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm)>;
   1145 
   1146 def ZnWriteBROADCAST : SchedWriteRes<[ZnAGU, ZnFPU13]> {
   1147   let NumMicroOps = 2;
   1148   let Latency = 8;
   1149 }
   1150 // VBROADCASTF128.
   1151 def : InstRW<[ZnWriteBROADCAST], (instrs VBROADCASTF128)>;
   1152 
   1153 // EXTRACTPS.
   1154 // r32,x,i.
   1155 def ZnWriteEXTRACTPSr : SchedWriteRes<[ZnFPU12, ZnFPU2]> {
   1156   let Latency = 2;
   1157   let NumMicroOps = 2;
   1158   let ResourceCycles = [1, 2];
   1159 }
   1160 def : InstRW<[ZnWriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
   1161 
   1162 def ZnWriteEXTRACTPSm : SchedWriteRes<[ZnAGU,ZnFPU12, ZnFPU2]> {
   1163   let Latency = 5;
   1164   let NumMicroOps = 2;
   1165   let ResourceCycles = [5, 1, 2];
   1166 }
   1167 // m32,x,i.
   1168 def : InstRW<[ZnWriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
   1169 
   1170 // VEXTRACTF128.
   1171 // x,y,i.
   1172 def : InstRW<[ZnWriteFPU013], (instrs VEXTRACTF128rr)>;
   1173 
   1174 // m128,y,i.
   1175 def : InstRW<[ZnWriteFPU013m], (instrs VEXTRACTF128mr)>;
   1176 
   1177 def ZnWriteVINSERT128r: SchedWriteRes<[ZnFPU013]> {
   1178   let Latency = 2;
   1179   let ResourceCycles = [2];
   1180 }
   1181 def ZnWriteVINSERT128Ld: SchedWriteRes<[ZnAGU,ZnFPU013]> {
   1182   let Latency = 9;
   1183   let NumMicroOps = 2;
   1184   let ResourceCycles = [1, 2];
   1185 }
   1186 // VINSERTF128.
   1187 // y,y,x,i.
   1188 def : InstRW<[ZnWriteVINSERT128r], (instrs VINSERTF128rr)>;
   1189 def : InstRW<[ZnWriteVINSERT128Ld], (instrs VINSERTF128rm)>;
   1190 
   1191 // VGATHER.
   1192 def : InstRW<[WriteMicrocoded], (instregex "VGATHER(Q|D)(PD|PS)(Y?)rm")>;
   1193 
   1194 //-- Conversion instructions --//
   1195 def ZnWriteCVTPD2PSr: SchedWriteRes<[ZnFPU3]> {
   1196   let Latency = 4;
   1197 }
   1198 def ZnWriteCVTPD2PSYr: SchedWriteRes<[ZnFPU3]> {
   1199   let Latency = 5;
   1200 }
   1201 
   1202 // CVTPD2PS.
   1203 // x,x.
   1204 def : SchedAlias<WriteCvtPD2PS,  ZnWriteCVTPD2PSr>;
   1205 // y,y.
   1206 def : SchedAlias<WriteCvtPD2PSY, ZnWriteCVTPD2PSYr>;
   1207 // z,z.
   1208 defm : X86WriteResUnsupported<WriteCvtPD2PSZ>;
   1209 
   1210 def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> {
   1211   let Latency = 11;
   1212   let NumMicroOps = 2;
   1213   let ResourceCycles = [1,2];
   1214 }
   1215 // x,m128.
   1216 def : SchedAlias<WriteCvtPD2PSLd, ZnWriteCVTPD2PSLd>;
   1217 
   1218 // x,m256.
   1219 def ZnWriteCVTPD2PSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
   1220   let Latency = 11;
   1221 }
   1222 def : SchedAlias<WriteCvtPD2PSYLd, ZnWriteCVTPD2PSYLd>;
   1223 // z,m512
   1224 defm : X86WriteResUnsupported<WriteCvtPD2PSZLd>;
   1225 
   1226 // CVTSD2SS.
   1227 // x,x.
   1228 // Same as WriteCVTPD2PSr
   1229 def : SchedAlias<WriteCvtSD2SS, ZnWriteCVTPD2PSr>;
   1230 
   1231 // x,m64.
   1232 def : SchedAlias<WriteCvtSD2SSLd, ZnWriteCVTPD2PSLd>;
   1233 
   1234 // CVTPS2PD.
   1235 // x,x.
   1236 def ZnWriteCVTPS2PDr : SchedWriteRes<[ZnFPU3]> {
   1237   let Latency = 3;
   1238 }
   1239 def : SchedAlias<WriteCvtPS2PD, ZnWriteCVTPS2PDr>;
   1240 
   1241 // x,m64.
   1242 // y,m128.
   1243 def ZnWriteCVTPS2PDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
   1244   let Latency = 10;
   1245   let NumMicroOps = 2;
   1246 }
   1247 def : SchedAlias<WriteCvtPS2PDLd, ZnWriteCVTPS2PDLd>;
   1248 def : SchedAlias<WriteCvtPS2PDYLd, ZnWriteCVTPS2PDLd>;
   1249 defm : X86WriteResUnsupported<WriteCvtPS2PDZLd>;
   1250 
   1251 // y,x.
   1252 def ZnWriteVCVTPS2PDY : SchedWriteRes<[ZnFPU3]> {
   1253   let Latency = 3;
   1254 }
   1255 def : SchedAlias<WriteCvtPS2PDY, ZnWriteVCVTPS2PDY>;
   1256 defm : X86WriteResUnsupported<WriteCvtPS2PDZ>;
   1257 
   1258 // CVTSS2SD.
   1259 // x,x.
   1260 def ZnWriteCVTSS2SDr : SchedWriteRes<[ZnFPU3]> {
   1261   let Latency = 4;
   1262 }
   1263 def : SchedAlias<WriteCvtSS2SD, ZnWriteCVTSS2SDr>;
   1264 
   1265 // x,m32.
   1266 def ZnWriteCVTSS2SDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
   1267   let Latency = 11;
   1268   let NumMicroOps = 2;
   1269   let ResourceCycles = [1, 2];
   1270 }
   1271 def : SchedAlias<WriteCvtSS2SDLd, ZnWriteCVTSS2SDLd>;
   1272 
   1273 def ZnWriteCVTDQ2PDr: SchedWriteRes<[ZnFPU12,ZnFPU3]> {
   1274   let Latency = 5;
   1275 }
   1276 // CVTDQ2PD.
   1277 // x,x.
   1278 def : InstRW<[ZnWriteCVTDQ2PDr], (instregex "(V)?CVTDQ2PDrr")>;
   1279 
   1280 // Same as xmm
   1281 // y,x.
   1282 def : InstRW<[ZnWriteCVTDQ2PDr], (instrs VCVTDQ2PDYrr)>;
   1283 
   1284 def ZnWriteCVTPD2DQr: SchedWriteRes<[ZnFPU12, ZnFPU3]> {
   1285   let Latency = 5;
   1286 }
   1287 // CVT(T)PD2DQ.
   1288 // x,x.
   1289 def : InstRW<[ZnWriteCVTDQ2PDr], (instregex "(V?)CVT(T?)PD2DQrr")>;
   1290 
   1291 def ZnWriteCVTPD2DQLd: SchedWriteRes<[ZnAGU,ZnFPU12,ZnFPU3]> {
   1292   let Latency = 12;
   1293   let NumMicroOps = 2;
   1294 }
   1295 // x,m128.
   1296 def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(V?)CVT(T?)PD2DQrm")>;
   1297 // same as xmm handling
   1298 // x,y.
   1299 def : InstRW<[ZnWriteCVTPD2DQr], (instregex "VCVT(T?)PD2DQYrr")>;
   1300 // x,m256.
   1301 def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "VCVT(T?)PD2DQYrm")>;
   1302 
   1303 def ZnWriteCVTPS2PIr: SchedWriteRes<[ZnFPU3]> {
   1304   let Latency = 4;
   1305 }
   1306 // CVT(T)PS2PI.
   1307 // mm,x.
   1308 def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIirr")>;
   1309 
   1310 // CVTPI2PD.
   1311 // x,mm.
   1312 def : InstRW<[ZnWriteCVTPS2PDr], (instrs MMX_CVTPI2PDirr)>;
   1313 
   1314 // CVT(T)PD2PI.
   1315 // mm,x.
   1316 def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIirr")>;
   1317 
   1318 def ZnWriteCVSTSI2SSr: SchedWriteRes<[ZnFPU3]> {
   1319   let Latency = 5;
   1320 }
   1321 
   1322 // same as CVTPD2DQr
   1323 // CVT(T)SS2SI.
   1324 // r32,x.
   1325 def : InstRW<[ZnWriteCVTPD2DQr], (instregex "(V?)CVT(T?)SS2SI(64)?rr")>;
   1326 // same as CVTPD2DQm
   1327 // r32,m32.
   1328 def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(V?)CVT(T?)SS2SI(64)?rm")>;
   1329 
   1330 def ZnWriteCVSTSI2SDr: SchedWriteRes<[ZnFPU013, ZnFPU3]> {
   1331   let Latency = 5;
   1332 }
   1333 // CVTSI2SD.
   1334 // x,r32/64.
   1335 def : InstRW<[ZnWriteCVSTSI2SDr], (instregex "(V?)CVTSI(64)?2SDrr")>;
   1336 
   1337 
   1338 def ZnWriteCVSTSI2SIr: SchedWriteRes<[ZnFPU3, ZnFPU2]> {
   1339   let Latency = 5;
   1340 }
   1341 def ZnWriteCVSTSI2SILd: SchedWriteRes<[ZnAGU, ZnFPU3, ZnFPU2]> {
   1342   let Latency = 12;
   1343 }
   1344 // CVTSD2SI.
   1345 // r32/64
   1346 def : InstRW<[ZnWriteCVSTSI2SIr], (instregex "(V?)CVT(T?)SD2SI(64)?rr")>;
   1347 // r32,m32.
   1348 def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>;
   1349 
   1350 // VCVTPS2PH.
   1351 // x,v,i.
   1352 def : SchedAlias<WriteCvtPS2PH,    ZnWriteMicrocoded>;
   1353 def : SchedAlias<WriteCvtPS2PHY,   ZnWriteMicrocoded>;
   1354 defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
   1355 // m,v,i.
   1356 def : SchedAlias<WriteCvtPS2PHSt,  ZnWriteMicrocoded>;
   1357 def : SchedAlias<WriteCvtPS2PHYSt, ZnWriteMicrocoded>;
   1358 defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
   1359 
   1360 // VCVTPH2PS.
   1361 // v,x.
   1362 def : SchedAlias<WriteCvtPH2PS,    ZnWriteMicrocoded>;
   1363 def : SchedAlias<WriteCvtPH2PSY,   ZnWriteMicrocoded>;
   1364 defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
   1365 // v,m.
   1366 def : SchedAlias<WriteCvtPH2PSLd,  ZnWriteMicrocoded>;
   1367 def : SchedAlias<WriteCvtPH2PSYLd, ZnWriteMicrocoded>;
   1368 defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
   1369 
   1370 //-- SSE4A instructions --//
   1371 // EXTRQ
   1372 def ZnWriteEXTRQ: SchedWriteRes<[ZnFPU12, ZnFPU2]> {
   1373   let Latency = 2;
   1374 }
   1375 def : InstRW<[ZnWriteEXTRQ], (instregex "EXTRQ")>;
   1376 
   1377 // INSERTQ
   1378 def ZnWriteINSERTQ: SchedWriteRes<[ZnFPU03,ZnFPU1]> {
   1379   let Latency = 4;
   1380 }
   1381 def : InstRW<[ZnWriteINSERTQ], (instregex "INSERTQ")>;
   1382 
   1383 //-- SHA instructions --//
   1384 // SHA256MSG2
   1385 def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;
   1386 
   1387 // SHA1MSG1, SHA256MSG1
   1388 // x,x.
   1389 def ZnWriteSHA1MSG1r : SchedWriteRes<[ZnFPU12]> {
   1390   let Latency = 2;
   1391   let ResourceCycles = [2];
   1392 }
   1393 def : InstRW<[ZnWriteSHA1MSG1r], (instregex "SHA(1|256)MSG1rr")>;
   1394 // x,m.
   1395 def ZnWriteSHA1MSG1Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
   1396   let Latency = 9;
   1397   let ResourceCycles = [1,2];
   1398 }
   1399 def : InstRW<[ZnWriteSHA1MSG1Ld], (instregex "SHA(1|256)MSG1rm")>;
   1400 
   1401 // SHA1MSG2
   1402 // x,x.
   1403 def ZnWriteSHA1MSG2r : SchedWriteRes<[ZnFPU12]> ;
   1404 def : InstRW<[ZnWriteSHA1MSG2r], (instregex "SHA1MSG2rr")>;
   1405 // x,m.
   1406 def ZnWriteSHA1MSG2Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
   1407   let Latency = 8;
   1408 }
   1409 def : InstRW<[ZnWriteSHA1MSG2Ld], (instregex "SHA1MSG2rm")>;
   1410 
   1411 // SHA1NEXTE
   1412 // x,x.
   1413 def ZnWriteSHA1NEXTEr : SchedWriteRes<[ZnFPU1]> ;
   1414 def : InstRW<[ZnWriteSHA1NEXTEr], (instregex "SHA1NEXTErr")>;
   1415 // x,m.
   1416 def ZnWriteSHA1NEXTELd : SchedWriteRes<[ZnAGU, ZnFPU1]> {
   1417   let Latency = 8;
   1418 }
   1419 def : InstRW<[ZnWriteSHA1NEXTELd], (instregex "SHA1NEXTErm")>;
   1420 
   1421 // SHA1RNDS4
   1422 // x,x.
   1423 def ZnWriteSHA1RNDS4r : SchedWriteRes<[ZnFPU1]> {
   1424   let Latency = 6;
   1425 }
   1426 def : InstRW<[ZnWriteSHA1RNDS4r], (instregex "SHA1RNDS4rr")>;
   1427 // x,m.
   1428 def ZnWriteSHA1RNDS4Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
   1429   let Latency = 13;
   1430 }
   1431 def : InstRW<[ZnWriteSHA1RNDS4Ld], (instregex "SHA1RNDS4rm")>;
   1432 
   1433 // SHA256RNDS2
   1434 // x,x.
   1435 def ZnWriteSHA256RNDS2r : SchedWriteRes<[ZnFPU1]> {
   1436   let Latency = 4;
   1437 }
   1438 def : InstRW<[ZnWriteSHA256RNDS2r], (instregex "SHA256RNDS2rr")>;
   1439 // x,m.
   1440 def ZnWriteSHA256RNDS2Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
   1441   let Latency = 11;
   1442 }
   1443 def : InstRW<[ZnWriteSHA256RNDS2Ld], (instregex "SHA256RNDS2rm")>;
   1444 
   1445 //-- Arithmetic instructions --//
   1446 
   1447 // HADD, HSUB PS/PD
   1448 def : SchedAlias<WriteFHAdd,    ZnWriteMicrocoded>;
   1449 def : SchedAlias<WriteFHAddLd,  ZnWriteMicrocoded>;
   1450 def : SchedAlias<WriteFHAddY,   ZnWriteMicrocoded>;
   1451 def : SchedAlias<WriteFHAddYLd, ZnWriteMicrocoded>;
   1452 
   1453 // VDIVPS.
   1454 // TODO - convert to ZnWriteResFpuPair
   1455 // y,y,y.
   1456 def ZnWriteVDIVPSYr : SchedWriteRes<[ZnFPU3]> {
   1457   let Latency = 12;
   1458   let ResourceCycles = [12];
   1459 }
   1460 def : SchedAlias<WriteFDivY,   ZnWriteVDIVPSYr>;
   1461 
   1462 // y,y,m256.
   1463 def ZnWriteVDIVPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
   1464   let Latency = 19;
   1465   let NumMicroOps = 2;
   1466   let ResourceCycles = [1, 19];
   1467 }
   1468 def : SchedAlias<WriteFDivYLd,  ZnWriteVDIVPSYLd>;
   1469 
   1470 // VDIVPD.
   1471 // TODO - convert to ZnWriteResFpuPair
   1472 // y,y,y.
   1473 def ZnWriteVDIVPDY : SchedWriteRes<[ZnFPU3]> {
   1474   let Latency = 15;
   1475   let ResourceCycles = [15];
   1476 }
   1477 def : SchedAlias<WriteFDiv64Y, ZnWriteVDIVPDY>;
   1478 
   1479 // y,y,m256.
   1480 def ZnWriteVDIVPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
   1481   let Latency = 22;
   1482   let NumMicroOps = 2;
   1483   let ResourceCycles = [1,22];
   1484 }
   1485 def : SchedAlias<WriteFDiv64YLd, ZnWriteVDIVPDYLd>;
   1486 
   1487 // DPPS.
   1488 // x,x,i / v,v,v,i.
   1489 def : SchedAlias<WriteDPPS,   ZnWriteMicrocoded>;
   1490 def : SchedAlias<WriteDPPSY,  ZnWriteMicrocoded>;
   1491 
   1492 // x,m,i / v,v,m,i.
   1493 def : SchedAlias<WriteDPPSLd, ZnWriteMicrocoded>;
   1494 def : SchedAlias<WriteDPPSYLd,ZnWriteMicrocoded>;
   1495 
   1496 // DPPD.
   1497 // x,x,i.
   1498 def : SchedAlias<WriteDPPD,   ZnWriteMicrocoded>;
   1499 
   1500 // x,m,i.
   1501 def : SchedAlias<WriteDPPDLd, ZnWriteMicrocoded>;
   1502 
   1503 // RSQRTSS
   1504 // TODO - convert to ZnWriteResFpuPair
   1505 // x,x.
   1506 def ZnWriteRSQRTSSr : SchedWriteRes<[ZnFPU02]> {
   1507   let Latency = 5;
   1508 }
   1509 def : SchedAlias<WriteFRsqrt, ZnWriteRSQRTSSr>;
   1510 
   1511 // x,m128.
   1512 def ZnWriteRSQRTSSLd: SchedWriteRes<[ZnAGU, ZnFPU02]> {
   1513   let Latency = 12;
   1514   let NumMicroOps = 2;
   1515   let ResourceCycles = [1,2]; // FIXME: Is this right?
   1516 }
   1517 def : SchedAlias<WriteFRsqrtLd, ZnWriteRSQRTSSLd>;
   1518 
   1519 // RSQRTPS
   1520 // TODO - convert to ZnWriteResFpuPair
   1521 // y,y.
   1522 def ZnWriteRSQRTPSYr : SchedWriteRes<[ZnFPU01]> {
   1523   let Latency = 5;
   1524   let NumMicroOps = 2;
   1525   let ResourceCycles = [2];
   1526 }
   1527 def : SchedAlias<WriteFRsqrtY, ZnWriteRSQRTPSYr>;
   1528 
   1529 // y,m256.
   1530 def ZnWriteRSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
   1531   let Latency = 12;
   1532   let NumMicroOps = 2;
   1533 }
   1534 def : SchedAlias<WriteFRsqrtYLd, ZnWriteRSQRTPSYLd>;
   1535 
   1536 //-- Other instructions --//
   1537 
   1538 // VZEROUPPER.
   1539 def : InstRW<[WriteMicrocoded], (instrs VZEROUPPER)>;
   1540 
   1541 // VZEROALL.
   1542 def : InstRW<[WriteMicrocoded], (instrs VZEROALL)>;
   1543 
   1544 } // SchedModel
   1545