1 //=- X86ScheduleZnver1.td - X86 Znver1 Scheduling -------------*- tablegen -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the machine model for Znver1 to support instruction 11 // scheduling and other instruction cost heuristics. 12 // 13 //===----------------------------------------------------------------------===// 14 15 def Znver1Model : SchedMachineModel { 16 // Zen can decode 4 instructions per cycle. 17 let IssueWidth = 4; 18 // Based on the reorder buffer we define MicroOpBufferSize 19 let MicroOpBufferSize = 192; 20 let LoadLatency = 4; 21 let MispredictPenalty = 17; 22 let HighLatency = 25; 23 let PostRAScheduler = 1; 24 25 // FIXME: This variable is required for incomplete model. 26 // We haven't catered all instructions. 27 // So, we reset the value of this variable so as to 28 // say that the model is incomplete. 29 let CompleteModel = 0; 30 } 31 32 let SchedModel = Znver1Model in { 33 34 // Zen can issue micro-ops to 10 different units in one cycle. 35 // These are 36 // * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3) 37 // * Two AGU units (ZAGU0, ZAGU1) 38 // * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3) 39 // AGUs feed load store queues @two loads and 1 store per cycle. 40 41 // Four ALU units are defined below 42 def ZnALU0 : ProcResource<1>; 43 def ZnALU1 : ProcResource<1>; 44 def ZnALU2 : ProcResource<1>; 45 def ZnALU3 : ProcResource<1>; 46 47 // Two AGU units are defined below 48 def ZnAGU0 : ProcResource<1>; 49 def ZnAGU1 : ProcResource<1>; 50 51 // Four FPU units are defined below 52 def ZnFPU0 : ProcResource<1>; 53 def ZnFPU1 : ProcResource<1>; 54 def ZnFPU2 : ProcResource<1>; 55 def ZnFPU3 : ProcResource<1>; 56 57 // FPU grouping 58 def ZnFPU013 : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>; 59 def ZnFPU01 : ProcResGroup<[ZnFPU0, ZnFPU1]>; 60 def ZnFPU12 : ProcResGroup<[ZnFPU1, ZnFPU2]>; 61 def ZnFPU13 : ProcResGroup<[ZnFPU1, ZnFPU3]>; 62 def ZnFPU23 : ProcResGroup<[ZnFPU2, ZnFPU3]>; 63 def ZnFPU02 : ProcResGroup<[ZnFPU0, ZnFPU2]>; 64 def ZnFPU03 : ProcResGroup<[ZnFPU0, ZnFPU3]>; 65 66 // Below are the grouping of the units. 67 // Micro-ops to be issued to multiple units are tackled this way. 68 69 // ALU grouping 70 // ZnALU03 - 0,3 grouping 71 def ZnALU03: ProcResGroup<[ZnALU0, ZnALU3]>; 72 73 // 56 Entry (14x4 entries) Int Scheduler 74 def ZnALU : ProcResGroup<[ZnALU0, ZnALU1, ZnALU2, ZnALU3]> { 75 let BufferSize=56; 76 } 77 78 // 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations 79 // but are relevant for some instructions 80 def ZnAGU : ProcResGroup<[ZnAGU0, ZnAGU1]> { 81 let BufferSize=28; 82 } 83 84 // Integer Multiplication issued on ALU1. 85 def ZnMultiplier : ProcResource<1>; 86 87 // Integer division issued on ALU2. 88 def ZnDivider : ProcResource<1>; 89 90 // 4 Cycles load-to use Latency is captured 91 def : ReadAdvance<ReadAfterLd, 4>; 92 93 // The Integer PRF for Zen is 168 entries, and it holds the architectural and 94 // speculative version of the 64-bit integer registers. 95 // Reference: "Software Optimization Guide for AMD Family 17h Processors" 96 def ZnIntegerPRF : RegisterFile<168, [GR64, CCR]>; 97 98 // 36 Entry (9x4 entries) floating-point Scheduler 99 def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]> { 100 let BufferSize=36; 101 } 102 103 // The Zen FP Retire Queue renames SIMD and FP uOps onto a pool of 160 128-bit 104 // registers. Operations on 256-bit data types are cracked into two COPs. 105 // Reference: "Software Optimization Guide for AMD Family 17h Processors" 106 def ZnFpuPRF: RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>; 107 108 // The unit can track up to 192 macro ops in-flight. 109 // The retire unit handles in-order commit of up to 8 macro ops per cycle. 110 // Reference: "Software Optimization Guide for AMD Family 17h Processors" 111 // To be noted, the retire unit is shared between integer and FP ops. 112 // In SMT mode it is 96 entry per thread. But, we do not use the conservative 113 // value here because there is currently no way to fully mode the SMT mode, 114 // so there is no point in trying. 115 def ZnRCU : RetireControlUnit<192, 8>; 116 117 // FIXME: there are 72 read buffers and 44 write buffers. 118 119 // (a folded load is an instruction that loads and does some operation) 120 // Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops 121 // Instructions with folded loads are usually micro-fused, so they only appear 122 // as two micro-ops. 123 // a. load and 124 // b. addpd 125 // This multiclass is for folded loads for integer units. 126 multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW, 127 list<ProcResourceKind> ExePorts, 128 int Lat, list<int> Res = [], int UOps = 1, 129 int LoadLat = 4, int LoadUOps = 1> { 130 // Register variant takes 1-cycle on Execution Port. 131 def : WriteRes<SchedRW, ExePorts> { 132 let Latency = Lat; 133 let ResourceCycles = Res; 134 let NumMicroOps = UOps; 135 } 136 137 // Memory variant also uses a cycle on ZnAGU 138 // adds LoadLat cycles to the latency (default = 4). 139 def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> { 140 let Latency = !add(Lat, LoadLat); 141 let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res)); 142 let NumMicroOps = !add(UOps, LoadUOps); 143 } 144 } 145 146 // This multiclass is for folded loads for floating point units. 147 multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW, 148 list<ProcResourceKind> ExePorts, 149 int Lat, list<int> Res = [], int UOps = 1, 150 int LoadLat = 7, int LoadUOps = 0> { 151 // Register variant takes 1-cycle on Execution Port. 152 def : WriteRes<SchedRW, ExePorts> { 153 let Latency = Lat; 154 let ResourceCycles = Res; 155 let NumMicroOps = UOps; 156 } 157 158 // Memory variant also uses a cycle on ZnAGU 159 // adds LoadLat cycles to the latency (default = 7). 160 def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> { 161 let Latency = !add(Lat, LoadLat); 162 let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res)); 163 let NumMicroOps = !add(UOps, LoadUOps); 164 } 165 } 166 167 // WriteRMW is set for instructions with Memory write 168 // operation in codegen 169 def : WriteRes<WriteRMW, [ZnAGU]>; 170 171 def : WriteRes<WriteStore, [ZnAGU]>; 172 def : WriteRes<WriteStoreNT, [ZnAGU]>; 173 def : WriteRes<WriteMove, [ZnALU]>; 174 def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; } 175 176 def : WriteRes<WriteZero, []>; 177 def : WriteRes<WriteLEA, [ZnALU]>; 178 defm : ZnWriteResPair<WriteALU, [ZnALU], 1>; 179 defm : ZnWriteResPair<WriteADC, [ZnALU], 1>; 180 defm : ZnWriteResPair<WriteIMul, [ZnALU1, ZnMultiplier], 4>; 181 defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>; 182 183 defm : X86WriteRes<WriteBSWAP32, [ZnALU], 1, [4], 1>; 184 defm : X86WriteRes<WriteBSWAP64, [ZnALU], 1, [4], 1>; 185 186 defm : ZnWriteResPair<WriteShift, [ZnALU], 1>; 187 188 defm : X86WriteRes<WriteSHDrri, [ZnALU], 1, [1], 1>; 189 defm : X86WriteResUnsupported<WriteSHDrrcl>; 190 defm : X86WriteResUnsupported<WriteSHDmri>; 191 defm : X86WriteResUnsupported<WriteSHDmrcl>; 192 193 defm : ZnWriteResPair<WriteJump, [ZnALU], 1>; 194 defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>; 195 196 defm : ZnWriteResPair<WriteCMOV, [ZnALU], 1>; 197 defm : ZnWriteResPair<WriteCMOV2, [ZnALU], 1>; 198 def : WriteRes<WriteSETCC, [ZnALU]>; 199 def : WriteRes<WriteSETCCStore, [ZnALU, ZnAGU]>; 200 defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>; 201 def : WriteRes<WriteBitTest,[ZnALU]>; 202 203 // Bit counts. 204 defm : ZnWriteResPair<WriteBSF, [ZnALU], 3>; 205 defm : ZnWriteResPair<WriteBSR, [ZnALU], 3>; 206 defm : ZnWriteResPair<WriteLZCNT, [ZnALU], 2>; 207 defm : ZnWriteResPair<WriteTZCNT, [ZnALU], 2>; 208 defm : ZnWriteResPair<WritePOPCNT, [ZnALU], 1>; 209 210 // Treat misc copies as a move. 211 def : InstRW<[WriteMove], (instrs COPY)>; 212 213 // BMI1 BEXTR, BMI2 BZHI 214 defm : ZnWriteResPair<WriteBEXTR, [ZnALU], 1>; 215 defm : ZnWriteResPair<WriteBZHI, [ZnALU], 1>; 216 217 // IDIV 218 defm : ZnWriteResPair<WriteDiv8, [ZnALU2, ZnDivider], 15, [1,15], 1>; 219 defm : ZnWriteResPair<WriteDiv16, [ZnALU2, ZnDivider], 17, [1,17], 2>; 220 defm : ZnWriteResPair<WriteDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>; 221 defm : ZnWriteResPair<WriteDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>; 222 defm : ZnWriteResPair<WriteIDiv8, [ZnALU2, ZnDivider], 15, [1,15], 1>; 223 defm : ZnWriteResPair<WriteIDiv16, [ZnALU2, ZnDivider], 17, [1,17], 2>; 224 defm : ZnWriteResPair<WriteIDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>; 225 defm : ZnWriteResPair<WriteIDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>; 226 227 // IMULH 228 def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{ 229 let Latency = 4; 230 } 231 232 // Floating point operations 233 defm : X86WriteRes<WriteFLoad, [ZnAGU], 8, [1], 1>; 234 defm : X86WriteRes<WriteFLoadX, [ZnAGU], 8, [1], 1>; 235 defm : X86WriteRes<WriteFLoadY, [ZnAGU], 8, [1], 1>; 236 defm : X86WriteRes<WriteFMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,1], 1>; 237 defm : X86WriteRes<WriteFMaskedLoadY, [ZnAGU,ZnFPU01], 8, [1,2], 2>; 238 defm : X86WriteRes<WriteFStore, [ZnAGU], 1, [1], 1>; 239 defm : X86WriteRes<WriteFStoreX, [ZnAGU], 1, [1], 1>; 240 defm : X86WriteRes<WriteFStoreY, [ZnAGU], 1, [1], 1>; 241 defm : X86WriteRes<WriteFStoreNT, [ZnAGU,ZnFPU2], 8, [1,1], 1>; 242 defm : X86WriteRes<WriteFStoreNTX, [ZnAGU], 1, [1], 1>; 243 defm : X86WriteRes<WriteFStoreNTY, [ZnAGU], 1, [1], 1>; 244 defm : X86WriteRes<WriteFMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>; 245 defm : X86WriteRes<WriteFMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>; 246 defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>; 247 defm : X86WriteRes<WriteFMoveX, [ZnFPU], 1, [1], 1>; 248 defm : X86WriteRes<WriteFMoveY, [ZnFPU], 1, [1], 1>; 249 250 defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>; 251 defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU0], 3>; 252 defm : ZnWriteResFpuPair<WriteFAddY, [ZnFPU0], 3>; 253 defm : X86WriteResPairUnsupported<WriteFAddZ>; 254 defm : ZnWriteResFpuPair<WriteFAdd64, [ZnFPU0], 3>; 255 defm : ZnWriteResFpuPair<WriteFAdd64X, [ZnFPU0], 3>; 256 defm : ZnWriteResFpuPair<WriteFAdd64Y, [ZnFPU0], 3>; 257 defm : X86WriteResPairUnsupported<WriteFAdd64Z>; 258 defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU0], 3>; 259 defm : ZnWriteResFpuPair<WriteFCmpX, [ZnFPU0], 3>; 260 defm : ZnWriteResFpuPair<WriteFCmpY, [ZnFPU0], 3>; 261 defm : X86WriteResPairUnsupported<WriteFCmpZ>; 262 defm : ZnWriteResFpuPair<WriteFCmp64, [ZnFPU0], 3>; 263 defm : ZnWriteResFpuPair<WriteFCmp64X, [ZnFPU0], 3>; 264 defm : ZnWriteResFpuPair<WriteFCmp64Y, [ZnFPU0], 3>; 265 defm : X86WriteResPairUnsupported<WriteFCmp64Z>; 266 defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>; 267 defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>; 268 defm : ZnWriteResFpuPair<WriteFBlendY, [ZnFPU01], 1>; 269 defm : X86WriteResPairUnsupported<WriteFBlendZ>; 270 defm : ZnWriteResFpuPair<WriteFVarBlend, [ZnFPU01], 1>; 271 defm : ZnWriteResFpuPair<WriteFVarBlendY,[ZnFPU01], 1>; 272 defm : X86WriteResPairUnsupported<WriteFVarBlendZ>; 273 defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>; 274 defm : ZnWriteResFpuPair<WriteVarBlendY, [ZnFPU0], 1>; 275 defm : X86WriteResPairUnsupported<WriteVarBlendZ>; 276 defm : ZnWriteResFpuPair<WriteCvtSS2I, [ZnFPU3], 5>; 277 defm : ZnWriteResFpuPair<WriteCvtPS2I, [ZnFPU3], 5>; 278 defm : ZnWriteResFpuPair<WriteCvtPS2IY, [ZnFPU3], 5>; 279 defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; 280 defm : ZnWriteResFpuPair<WriteCvtSD2I, [ZnFPU3], 5>; 281 defm : ZnWriteResFpuPair<WriteCvtPD2I, [ZnFPU3], 5>; 282 defm : ZnWriteResFpuPair<WriteCvtPD2IY, [ZnFPU3], 5>; 283 defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; 284 defm : ZnWriteResFpuPair<WriteCvtI2SS, [ZnFPU3], 5>; 285 defm : ZnWriteResFpuPair<WriteCvtI2PS, [ZnFPU3], 5>; 286 defm : ZnWriteResFpuPair<WriteCvtI2PSY, [ZnFPU3], 5>; 287 defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; 288 defm : ZnWriteResFpuPair<WriteCvtI2SD, [ZnFPU3], 5>; 289 defm : ZnWriteResFpuPair<WriteCvtI2PD, [ZnFPU3], 5>; 290 defm : ZnWriteResFpuPair<WriteCvtI2PDY, [ZnFPU3], 5>; 291 defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; 292 defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>; 293 defm : ZnWriteResFpuPair<WriteFDivX, [ZnFPU3], 15>; 294 //defm : ZnWriteResFpuPair<WriteFDivY, [ZnFPU3], 15>; 295 defm : X86WriteResPairUnsupported<WriteFDivZ>; 296 defm : ZnWriteResFpuPair<WriteFDiv64, [ZnFPU3], 15>; 297 defm : ZnWriteResFpuPair<WriteFDiv64X, [ZnFPU3], 15>; 298 //defm : ZnWriteResFpuPair<WriteFDiv64Y, [ZnFPU3], 15>; 299 defm : X86WriteResPairUnsupported<WriteFDiv64Z>; 300 defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>; 301 defm : ZnWriteResFpuPair<WriteFRnd, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops? 302 defm : ZnWriteResFpuPair<WriteFRndY, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops? 303 defm : X86WriteResPairUnsupported<WriteFRndZ>; 304 defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>; 305 defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>; 306 defm : X86WriteResPairUnsupported<WriteFLogicZ>; 307 defm : ZnWriteResFpuPair<WriteFTest, [ZnFPU], 1>; 308 defm : ZnWriteResFpuPair<WriteFTestY, [ZnFPU], 1>; 309 defm : X86WriteResPairUnsupported<WriteFTestZ>; 310 defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>; 311 defm : ZnWriteResFpuPair<WriteFShuffleY, [ZnFPU12], 1>; 312 defm : X86WriteResPairUnsupported<WriteFShuffleZ>; 313 defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>; 314 defm : ZnWriteResFpuPair<WriteFVarShuffleY,[ZnFPU12], 1>; 315 defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; 316 defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU01], 3, [1], 1, 7, 1>; 317 defm : ZnWriteResFpuPair<WriteFMulX, [ZnFPU01], 3, [1], 1, 7, 1>; 318 defm : ZnWriteResFpuPair<WriteFMulY, [ZnFPU01], 4, [1], 1, 7, 1>; 319 defm : X86WriteResPairUnsupported<WriteFMulZ>; 320 defm : ZnWriteResFpuPair<WriteFMul64, [ZnFPU01], 3, [1], 1, 7, 1>; 321 defm : ZnWriteResFpuPair<WriteFMul64X, [ZnFPU01], 3, [1], 1, 7, 1>; 322 defm : ZnWriteResFpuPair<WriteFMul64Y, [ZnFPU01], 4, [1], 1, 7, 1>; 323 defm : X86WriteResPairUnsupported<WriteFMul64Z>; 324 defm : ZnWriteResFpuPair<WriteFMA, [ZnFPU03], 5>; 325 defm : ZnWriteResFpuPair<WriteFMAX, [ZnFPU03], 5>; 326 defm : ZnWriteResFpuPair<WriteFMAY, [ZnFPU03], 5>; 327 defm : X86WriteResPairUnsupported<WriteFMAZ>; 328 defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>; 329 defm : ZnWriteResFpuPair<WriteFRcpX, [ZnFPU01], 5>; 330 defm : ZnWriteResFpuPair<WriteFRcpY, [ZnFPU01], 5, [1], 1, 7, 2>; 331 defm : X86WriteResPairUnsupported<WriteFRcpZ>; 332 //defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU02], 5>; 333 defm : ZnWriteResFpuPair<WriteFRsqrtX, [ZnFPU01], 5, [1], 1, 7, 1>; 334 //defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5, [2], 2>; 335 defm : X86WriteResPairUnsupported<WriteFRsqrtZ>; 336 defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 20, [20]>; 337 defm : ZnWriteResFpuPair<WriteFSqrtX, [ZnFPU3], 20, [20]>; 338 defm : ZnWriteResFpuPair<WriteFSqrtY, [ZnFPU3], 28, [28], 1, 7, 1>; 339 defm : X86WriteResPairUnsupported<WriteFSqrtZ>; 340 defm : ZnWriteResFpuPair<WriteFSqrt64, [ZnFPU3], 20, [20]>; 341 defm : ZnWriteResFpuPair<WriteFSqrt64X, [ZnFPU3], 20, [20]>; 342 defm : ZnWriteResFpuPair<WriteFSqrt64Y, [ZnFPU3], 40, [40], 1, 7, 1>; 343 defm : X86WriteResPairUnsupported<WriteFSqrt64Z>; 344 defm : ZnWriteResFpuPair<WriteFSqrt80, [ZnFPU3], 20, [20]>; 345 346 // Vector integer operations which uses FPU units 347 defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>; 348 defm : X86WriteRes<WriteVecLoadX, [ZnAGU], 8, [1], 1>; 349 defm : X86WriteRes<WriteVecLoadY, [ZnAGU], 8, [1], 1>; 350 defm : X86WriteRes<WriteVecLoadNT, [ZnAGU], 8, [1], 1>; 351 defm : X86WriteRes<WriteVecLoadNTY, [ZnAGU], 8, [1], 1>; 352 defm : X86WriteRes<WriteVecMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,2], 2>; 353 defm : X86WriteRes<WriteVecMaskedLoadY, [ZnAGU,ZnFPU01], 9, [1,3], 2>; 354 defm : X86WriteRes<WriteVecStore, [ZnAGU], 1, [1], 1>; 355 defm : X86WriteRes<WriteVecStoreX, [ZnAGU], 1, [1], 1>; 356 defm : X86WriteRes<WriteVecStoreY, [ZnAGU], 1, [1], 1>; 357 defm : X86WriteRes<WriteVecStoreNT, [ZnAGU], 1, [1], 1>; 358 defm : X86WriteRes<WriteVecStoreNTY, [ZnAGU], 1, [1], 1>; 359 defm : X86WriteRes<WriteVecMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>; 360 defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>; 361 defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>; 362 defm : X86WriteRes<WriteVecMoveX, [ZnFPU], 1, [1], 1>; 363 defm : X86WriteRes<WriteVecMoveY, [ZnFPU], 2, [1], 2>; 364 defm : X86WriteRes<WriteVecMoveToGpr, [ZnFPU2], 2, [1], 1>; 365 defm : X86WriteRes<WriteVecMoveFromGpr, [ZnFPU2], 3, [1], 1>; 366 defm : X86WriteRes<WriteEMMS, [ZnFPU], 2, [1], 1>; 367 368 defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>; 369 defm : ZnWriteResFpuPair<WriteVecShiftX, [ZnFPU2], 1>; 370 defm : ZnWriteResFpuPair<WriteVecShiftY, [ZnFPU2], 2>; 371 defm : X86WriteResPairUnsupported<WriteVecShiftZ>; 372 defm : ZnWriteResFpuPair<WriteVecShiftImm, [ZnFPU], 1>; 373 defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU], 1>; 374 defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU], 1>; 375 defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; 376 defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>; 377 defm : ZnWriteResFpuPair<WriteVecLogicX, [ZnFPU], 1>; 378 defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>; 379 defm : X86WriteResPairUnsupported<WriteVecLogicZ>; 380 defm : ZnWriteResFpuPair<WriteVecTest, [ZnFPU12], 1, [2], 1, 7, 1>; 381 defm : ZnWriteResFpuPair<WriteVecTestY, [ZnFPU12], 1, [2], 1, 7, 1>; 382 defm : X86WriteResPairUnsupported<WriteVecTestZ>; 383 defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>; 384 defm : ZnWriteResFpuPair<WriteVecALUX, [ZnFPU], 1>; 385 defm : ZnWriteResFpuPair<WriteVecALUY, [ZnFPU], 1>; 386 defm : X86WriteResPairUnsupported<WriteVecALUZ>; 387 defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>; 388 defm : ZnWriteResFpuPair<WriteVecIMulX, [ZnFPU0], 4>; 389 defm : ZnWriteResFpuPair<WriteVecIMulY, [ZnFPU0], 4>; 390 defm : X86WriteResPairUnsupported<WriteVecIMulZ>; 391 defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4, [1], 1, 7, 1>; // FIXME 392 defm : ZnWriteResFpuPair<WritePMULLDY, [ZnFPU0], 5, [2], 1, 7, 1>; // FIXME 393 defm : X86WriteResPairUnsupported<WritePMULLDZ>; 394 defm : ZnWriteResFpuPair<WriteShuffle, [ZnFPU], 1>; 395 defm : ZnWriteResFpuPair<WriteShuffleX, [ZnFPU], 1>; 396 defm : ZnWriteResFpuPair<WriteShuffleY, [ZnFPU], 1>; 397 defm : X86WriteResPairUnsupported<WriteShuffleZ>; 398 defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU], 1>; 399 defm : ZnWriteResFpuPair<WriteVarShuffleX,[ZnFPU], 1>; 400 defm : ZnWriteResFpuPair<WriteVarShuffleY,[ZnFPU], 1>; 401 defm : X86WriteResPairUnsupported<WriteVarShuffleZ>; 402 defm : ZnWriteResFpuPair<WriteBlend, [ZnFPU01], 1>; 403 defm : ZnWriteResFpuPair<WriteBlendY, [ZnFPU01], 1>; 404 defm : X86WriteResPairUnsupported<WriteBlendZ>; 405 defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>; 406 defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2>; 407 defm : ZnWriteResFpuPair<WritePSADBW, [ZnFPU0], 3>; 408 defm : ZnWriteResFpuPair<WritePSADBWX, [ZnFPU0], 3>; 409 defm : ZnWriteResFpuPair<WritePSADBWY, [ZnFPU0], 3>; 410 defm : X86WriteResPairUnsupported<WritePSADBWZ>; 411 defm : ZnWriteResFpuPair<WritePHMINPOS, [ZnFPU0], 4>; 412 413 // Vector Shift Operations 414 defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>; 415 defm : ZnWriteResFpuPair<WriteVarVecShiftY, [ZnFPU12], 1>; 416 defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; 417 418 // Vector insert/extract operations. 419 defm : ZnWriteResFpuPair<WriteVecInsert, [ZnFPU], 1>; 420 421 def : WriteRes<WriteVecExtract, [ZnFPU12, ZnFPU2]> { 422 let Latency = 2; 423 let ResourceCycles = [1, 2]; 424 } 425 def : WriteRes<WriteVecExtractSt, [ZnAGU, ZnFPU12, ZnFPU2]> { 426 let Latency = 5; 427 let NumMicroOps = 2; 428 let ResourceCycles = [1, 2, 3]; 429 } 430 431 // MOVMSK Instructions. 432 def : WriteRes<WriteFMOVMSK, [ZnFPU2]>; 433 def : WriteRes<WriteMMXMOVMSK, [ZnFPU2]>; 434 def : WriteRes<WriteVecMOVMSK, [ZnFPU2]>; 435 436 def : WriteRes<WriteVecMOVMSKY, [ZnFPU2]> { 437 let NumMicroOps = 2; 438 let Latency = 2; 439 let ResourceCycles = [2]; 440 } 441 442 // AES Instructions. 443 defm : ZnWriteResFpuPair<WriteAESDecEnc, [ZnFPU01], 4>; 444 defm : ZnWriteResFpuPair<WriteAESIMC, [ZnFPU01], 4>; 445 defm : ZnWriteResFpuPair<WriteAESKeyGen, [ZnFPU01], 4>; 446 447 def : WriteRes<WriteFence, [ZnAGU]>; 448 def : WriteRes<WriteNop, []>; 449 450 // Following instructions with latency=100 are microcoded. 451 // We set long latency so as to block the entire pipeline. 452 defm : ZnWriteResFpuPair<WriteFShuffle256, [ZnFPU], 100>; 453 defm : ZnWriteResFpuPair<WriteFVarShuffle256, [ZnFPU], 100>; 454 455 // Microcoded Instructions 456 def ZnWriteMicrocoded : SchedWriteRes<[]> { 457 let Latency = 100; 458 } 459 460 def : SchedAlias<WriteMicrocoded, ZnWriteMicrocoded>; 461 def : SchedAlias<WriteFCMOV, ZnWriteMicrocoded>; 462 def : SchedAlias<WriteSystem, ZnWriteMicrocoded>; 463 def : SchedAlias<WriteMPSAD, ZnWriteMicrocoded>; 464 def : SchedAlias<WriteMPSADY, ZnWriteMicrocoded>; 465 def : SchedAlias<WriteMPSADLd, ZnWriteMicrocoded>; 466 def : SchedAlias<WriteMPSADYLd, ZnWriteMicrocoded>; 467 def : SchedAlias<WriteCLMul, ZnWriteMicrocoded>; 468 def : SchedAlias<WriteCLMulLd, ZnWriteMicrocoded>; 469 def : SchedAlias<WritePCmpIStrM, ZnWriteMicrocoded>; 470 def : SchedAlias<WritePCmpIStrMLd, ZnWriteMicrocoded>; 471 def : SchedAlias<WritePCmpEStrI, ZnWriteMicrocoded>; 472 def : SchedAlias<WritePCmpEStrILd, ZnWriteMicrocoded>; 473 def : SchedAlias<WritePCmpEStrM, ZnWriteMicrocoded>; 474 def : SchedAlias<WritePCmpEStrMLd, ZnWriteMicrocoded>; 475 def : SchedAlias<WritePCmpIStrI, ZnWriteMicrocoded>; 476 def : SchedAlias<WritePCmpIStrILd, ZnWriteMicrocoded>; 477 def : SchedAlias<WriteLDMXCSR, ZnWriteMicrocoded>; 478 def : SchedAlias<WriteSTMXCSR, ZnWriteMicrocoded>; 479 480 //=== Regex based InstRW ===// 481 // Notation: 482 // - r: register. 483 // - m = memory. 484 // - i = immediate 485 // - mm: 64 bit mmx register. 486 // - x = 128 bit xmm register. 487 // - (x)mm = mmx or xmm register. 488 // - y = 256 bit ymm register. 489 // - v = any vector register. 490 491 //=== Integer Instructions ===// 492 //-- Move instructions --// 493 // MOV. 494 // r16,m. 495 def : InstRW<[WriteALULd, ReadAfterLd], (instregex "MOV16rm")>; 496 497 // MOVSX, MOVZX. 498 // r,m. 499 def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>; 500 501 // XCHG. 502 // r,r. 503 def ZnWriteXCHG : SchedWriteRes<[ZnALU]> { 504 let NumMicroOps = 2; 505 let ResourceCycles = [2]; 506 } 507 508 def : InstRW<[ZnWriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>; 509 510 // r,m. 511 def ZnWriteXCHGrm : SchedWriteRes<[ZnAGU, ZnALU]> { 512 let Latency = 5; 513 let NumMicroOps = 2; 514 } 515 def : InstRW<[ZnWriteXCHGrm, ReadAfterLd], (instregex "XCHG(8|16|32|64)rm")>; 516 517 def : InstRW<[WriteMicrocoded], (instrs XLAT)>; 518 519 // POP16. 520 // r. 521 def ZnWritePop16r : SchedWriteRes<[ZnAGU]>{ 522 let Latency = 5; 523 let NumMicroOps = 2; 524 } 525 def : InstRW<[ZnWritePop16r], (instregex "POP16rmm")>; 526 def : InstRW<[WriteMicrocoded], (instregex "POPF(16|32)")>; 527 def : InstRW<[WriteMicrocoded], (instregex "POPA(16|32)")>; 528 529 530 // PUSH. 531 // r. Has default values. 532 // m. 533 def ZnWritePUSH : SchedWriteRes<[ZnAGU]>{ 534 let Latency = 4; 535 } 536 def : InstRW<[ZnWritePUSH], (instregex "PUSH(16|32)rmm")>; 537 538 //PUSHF 539 def : InstRW<[WriteMicrocoded], (instregex "PUSHF(16|32)")>; 540 541 // PUSHA. 542 def ZnWritePushA : SchedWriteRes<[ZnAGU]> { 543 let Latency = 8; 544 } 545 def : InstRW<[ZnWritePushA], (instregex "PUSHA(16|32)")>; 546 547 //LAHF 548 def : InstRW<[WriteMicrocoded], (instrs LAHF)>; 549 550 // MOVBE. 551 // r,m. 552 def ZnWriteMOVBE : SchedWriteRes<[ZnAGU, ZnALU]> { 553 let Latency = 5; 554 } 555 def : InstRW<[ZnWriteMOVBE, ReadAfterLd], (instregex "MOVBE(16|32|64)rm")>; 556 557 // m16,r16. 558 def : InstRW<[ZnWriteMOVBE], (instregex "MOVBE(16|32|64)mr")>; 559 560 //-- Arithmetic instructions --// 561 562 // ADD SUB. 563 // m,r/i. 564 def : InstRW<[WriteALULd], (instregex "(ADD|SUB)(8|16|32|64)m(r|i)", 565 "(ADD|SUB)(8|16|32|64)mi8", 566 "(ADD|SUB)64mi32")>; 567 568 // ADC SBB. 569 // m,r/i. 570 def : InstRW<[WriteALULd], 571 (instregex "(ADC|SBB)(8|16|32|64)m(r|i)", 572 "(ADC|SBB)(16|32|64)mi8", 573 "(ADC|SBB)64mi32")>; 574 575 // INC DEC NOT NEG. 576 // m. 577 def : InstRW<[WriteALULd], 578 (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m")>; 579 580 // MUL IMUL. 581 // r16. 582 def ZnWriteMul16 : SchedWriteRes<[ZnALU1, ZnMultiplier]> { 583 let Latency = 3; 584 } 585 def : InstRW<[ZnWriteMul16], (instrs IMUL16r, MUL16r)>; 586 def : InstRW<[ZnWriteMul16], (instrs IMUL16rr, IMUL16rri, IMUL16rri8)>; // TODO: is this right? 587 def : InstRW<[ZnWriteMul16], (instrs IMUL16rm, IMUL16rmi, IMUL16rmi8)>; // TODO: this is definitely wrong but matches what the instregex did. 588 589 // m16. 590 def ZnWriteMul16Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { 591 let Latency = 8; 592 } 593 def : InstRW<[ZnWriteMul16Ld, ReadAfterLd], (instrs IMUL16m, MUL16m)>; 594 595 // r32. 596 def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> { 597 let Latency = 3; 598 } 599 def : InstRW<[ZnWriteMul32], (instrs IMUL32r, MUL32r)>; 600 def : InstRW<[ZnWriteMul32], (instrs IMUL32rr, IMUL32rri, IMUL32rri8)>; // TODO: is this right? 601 def : InstRW<[ZnWriteMul32], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8)>; // TODO: this is definitely wrong but matches what the instregex did. 602 603 // m32. 604 def ZnWriteMul32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { 605 let Latency = 8; 606 } 607 def : InstRW<[ZnWriteMul32Ld, ReadAfterLd], (instrs IMUL32m, MUL32m)>; 608 609 // r64. 610 def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> { 611 let Latency = 4; 612 let NumMicroOps = 2; 613 } 614 def : InstRW<[ZnWriteMul64], (instrs IMUL64r, MUL64r)>; 615 def : InstRW<[ZnWriteMul64], (instrs IMUL64rr, IMUL64rri8, IMUL64rri32)>; // TODO: is this right? 616 def : InstRW<[ZnWriteMul64], (instrs IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; // TODO: this is definitely wrong but matches what the instregex did. 617 618 // m64. 619 def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { 620 let Latency = 9; 621 let NumMicroOps = 2; 622 } 623 def : InstRW<[ZnWriteMul64Ld, ReadAfterLd], (instrs IMUL64m, MUL64m)>; 624 625 // MULX. 626 // r32,r32,r32. 627 def ZnWriteMulX32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> { 628 let Latency = 3; 629 let ResourceCycles = [1, 2]; 630 } 631 def : InstRW<[ZnWriteMulX32], (instrs MULX32rr)>; 632 633 // r32,r32,m32. 634 def ZnWriteMulX32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { 635 let Latency = 8; 636 let ResourceCycles = [1, 2, 2]; 637 } 638 def : InstRW<[ZnWriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>; 639 640 // r64,r64,r64. 641 def ZnWriteMulX64 : SchedWriteRes<[ZnALU1]> { 642 let Latency = 3; 643 } 644 def : InstRW<[ZnWriteMulX64], (instrs MULX64rr)>; 645 646 // r64,r64,m64. 647 def ZnWriteMulX64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { 648 let Latency = 8; 649 } 650 def : InstRW<[ZnWriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>; 651 652 //-- Control transfer instructions --// 653 654 // J(E|R)CXZ. 655 def ZnWriteJCXZ : SchedWriteRes<[ZnALU03]>; 656 def : InstRW<[ZnWriteJCXZ], (instrs JCXZ, JECXZ, JRCXZ)>; 657 658 // INTO 659 def : InstRW<[WriteMicrocoded], (instrs INTO)>; 660 661 // LOOP. 662 def ZnWriteLOOP : SchedWriteRes<[ZnALU03]>; 663 def : InstRW<[ZnWriteLOOP], (instrs LOOP)>; 664 665 // LOOP(N)E, LOOP(N)Z 666 def ZnWriteLOOPE : SchedWriteRes<[ZnALU03]>; 667 def : InstRW<[ZnWriteLOOPE], (instrs LOOPE, LOOPNE)>; 668 669 // CALL. 670 // r. 671 def ZnWriteCALLr : SchedWriteRes<[ZnAGU, ZnALU03]>; 672 def : InstRW<[ZnWriteCALLr], (instregex "CALL(16|32)r")>; 673 674 def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>; 675 676 // RET. 677 def ZnWriteRET : SchedWriteRes<[ZnALU03]> { 678 let NumMicroOps = 2; 679 } 680 def : InstRW<[ZnWriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)", 681 "IRET(16|32|64)")>; 682 683 //-- Logic instructions --// 684 685 // AND OR XOR. 686 // m,r/i. 687 def : InstRW<[WriteALULd], 688 (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)", 689 "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>; 690 691 // Define ALU latency variants 692 def ZnWriteALULat2 : SchedWriteRes<[ZnALU]> { 693 let Latency = 2; 694 } 695 def ZnWriteALULat2Ld : SchedWriteRes<[ZnAGU, ZnALU]> { 696 let Latency = 6; 697 } 698 699 // BT. 700 // m,i. 701 def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>; 702 703 // BTR BTS BTC. 704 // r,r,i. 705 def ZnWriteBTRSC : SchedWriteRes<[ZnALU]> { 706 let Latency = 2; 707 let NumMicroOps = 2; 708 } 709 def : InstRW<[ZnWriteBTRSC], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>; 710 711 // m,r,i. 712 def ZnWriteBTRSCm : SchedWriteRes<[ZnAGU, ZnALU]> { 713 let Latency = 6; 714 let NumMicroOps = 2; 715 } 716 // m,r,i. 717 def : InstRW<[ZnWriteBTRSCm], (instregex "BT(R|S|C)(16|32|64)m(r|i8)")>; 718 719 // BLSI BLSMSK BLSR. 720 // r,r. 721 def : InstRW<[ZnWriteALULat2], (instregex "BLS(I|MSK|R)(32|64)rr")>; 722 // r,m. 723 def : InstRW<[ZnWriteALULat2Ld], (instregex "BLS(I|MSK|R)(32|64)rm")>; 724 725 // CLD STD. 726 def : InstRW<[WriteALU], (instrs STD, CLD)>; 727 728 // PDEP PEXT. 729 // r,r,r. 730 def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>; 731 // r,r,m. 732 def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>; 733 734 // RCR RCL. 735 // m,i. 736 def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m(1|i|CL)")>; 737 738 // SHR SHL SAR. 739 // m,i. 740 def : InstRW<[WriteShiftLd], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>; 741 742 // SHRD SHLD. 743 // m,r 744 def : InstRW<[WriteShiftLd], (instregex "SH(R|L)D(16|32|64)mri8")>; 745 746 // r,r,cl. 747 def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)rrCL")>; 748 749 // m,r,cl. 750 def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>; 751 752 //-- Misc instructions --// 753 // CMPXCHG. 754 def ZnWriteCMPXCHG : SchedWriteRes<[ZnAGU, ZnALU]> { 755 let Latency = 8; 756 let NumMicroOps = 5; 757 } 758 def : InstRW<[ZnWriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>; 759 760 // CMPXCHG8B. 761 def ZnWriteCMPXCHG8B : SchedWriteRes<[ZnAGU, ZnALU]> { 762 let NumMicroOps = 18; 763 } 764 def : InstRW<[ZnWriteCMPXCHG8B], (instrs CMPXCHG8B)>; 765 766 def : InstRW<[WriteMicrocoded], (instrs CMPXCHG16B)>; 767 768 // LEAVE 769 def ZnWriteLEAVE : SchedWriteRes<[ZnALU, ZnAGU]> { 770 let Latency = 8; 771 let NumMicroOps = 2; 772 } 773 def : InstRW<[ZnWriteLEAVE], (instregex "LEAVE")>; 774 775 // PAUSE. 776 def : InstRW<[WriteMicrocoded], (instrs PAUSE)>; 777 778 // RDTSC. 779 def : InstRW<[WriteMicrocoded], (instregex "RDTSC")>; 780 781 // RDPMC. 782 def : InstRW<[WriteMicrocoded], (instrs RDPMC)>; 783 784 // RDRAND. 785 def : InstRW<[WriteMicrocoded], (instregex "RDRAND(16|32|64)r")>; 786 787 // XGETBV. 788 def : InstRW<[WriteMicrocoded], (instregex "XGETBV")>; 789 790 //-- String instructions --// 791 // CMPS. 792 def : InstRW<[WriteMicrocoded], (instregex "CMPS(B|L|Q|W)")>; 793 794 // LODSB/W. 795 def : InstRW<[WriteMicrocoded], (instregex "LODS(B|W)")>; 796 797 // LODSD/Q. 798 def : InstRW<[WriteMicrocoded], (instregex "LODS(L|Q)")>; 799 800 // MOVS. 801 def : InstRW<[WriteMicrocoded], (instregex "MOVS(B|L|Q|W)")>; 802 803 // SCAS. 804 def : InstRW<[WriteMicrocoded], (instregex "SCAS(B|W|L|Q)")>; 805 806 // STOS 807 def : InstRW<[WriteMicrocoded], (instregex "STOS(B|L|Q|W)")>; 808 809 // XADD. 810 def : InstRW<[WriteMicrocoded], (instregex "XADD(8|16|32|64)rm")>; 811 812 //=== Floating Point x87 Instructions ===// 813 //-- Move instructions --// 814 815 def ZnWriteFLDr : SchedWriteRes<[ZnFPU13]> ; 816 817 def ZnWriteSTr: SchedWriteRes<[ZnFPU23]> { 818 let Latency = 5; 819 let NumMicroOps = 2; 820 } 821 822 // LD_F. 823 // r. 824 def : InstRW<[ZnWriteFLDr], (instregex "LD_Frr")>; 825 826 // m. 827 def ZnWriteLD_F80m : SchedWriteRes<[ZnAGU, ZnFPU13]> { 828 let NumMicroOps = 2; 829 } 830 def : InstRW<[ZnWriteLD_F80m], (instregex "LD_F80m")>; 831 832 // FBLD. 833 def : InstRW<[WriteMicrocoded], (instregex "FBLDm")>; 834 835 // FST(P). 836 // r. 837 def : InstRW<[ZnWriteSTr], (instregex "ST_(F|FP)rr")>; 838 839 // m80. 840 def ZnWriteST_FP80m : SchedWriteRes<[ZnAGU, ZnFPU23]> { 841 let Latency = 5; 842 } 843 def : InstRW<[ZnWriteST_FP80m], (instregex "ST_FP80m")>; 844 845 // FBSTP. 846 // m80. 847 def : InstRW<[WriteMicrocoded], (instregex "FBSTPm")>; 848 849 def ZnWriteFXCH : SchedWriteRes<[ZnFPU]>; 850 851 // FXCHG. 852 def : InstRW<[ZnWriteFXCH], (instrs XCH_F)>; 853 854 // FILD. 855 def ZnWriteFILD : SchedWriteRes<[ZnAGU, ZnFPU3]> { 856 let Latency = 11; 857 let NumMicroOps = 2; 858 } 859 def : InstRW<[ZnWriteFILD], (instregex "ILD_F(16|32|64)m")>; 860 861 // FIST(P) FISTTP. 862 def ZnWriteFIST : SchedWriteRes<[ZnAGU, ZnFPU23]> { 863 let Latency = 12; 864 } 865 def : InstRW<[ZnWriteFIST], (instregex "IS(T|TT)_(F|FP)(16|32|64)m")>; 866 867 def ZnWriteFPU13 : SchedWriteRes<[ZnAGU, ZnFPU13]> { 868 let Latency = 8; 869 } 870 871 def ZnWriteFPU3 : SchedWriteRes<[ZnAGU, ZnFPU3]> { 872 let Latency = 11; 873 } 874 875 // FLDZ. 876 def : SchedAlias<WriteFLD0, ZnWriteFPU13>; 877 878 // FLD1. 879 def : SchedAlias<WriteFLD1, ZnWriteFPU3>; 880 881 // FLDPI FLDL2E etc. 882 def : SchedAlias<WriteFLDC, ZnWriteFPU3>; 883 884 // FNSTSW. 885 // AX. 886 def : InstRW<[WriteMicrocoded], (instrs FNSTSW16r)>; 887 888 // m16. 889 def : InstRW<[WriteMicrocoded], (instrs FNSTSWm)>; 890 891 // FLDCW. 892 def : InstRW<[WriteMicrocoded], (instrs FLDCW16m)>; 893 894 // FNSTCW. 895 def : InstRW<[WriteMicrocoded], (instrs FNSTCW16m)>; 896 897 // FINCSTP FDECSTP. 898 def : InstRW<[ZnWriteFPU3], (instrs FINCSTP, FDECSTP)>; 899 900 // FFREE. 901 def : InstRW<[ZnWriteFPU3], (instregex "FFREE")>; 902 903 // FNSAVE. 904 def : InstRW<[WriteMicrocoded], (instregex "FSAVEm")>; 905 906 // FRSTOR. 907 def : InstRW<[WriteMicrocoded], (instregex "FRSTORm")>; 908 909 //-- Arithmetic instructions --// 910 911 def ZnWriteFPU3Lat1 : SchedWriteRes<[ZnFPU3]> ; 912 913 def ZnWriteFPU0Lat1 : SchedWriteRes<[ZnFPU0]> ; 914 915 def ZnWriteFPU0Lat1Ld : SchedWriteRes<[ZnAGU, ZnFPU0]> { 916 let Latency = 8; 917 } 918 919 // FCHS. 920 def : InstRW<[ZnWriteFPU3Lat1], (instregex "CHS_F")>; 921 922 // FCOM(P) FUCOM(P). 923 // r. 924 def : InstRW<[ZnWriteFPU0Lat1], (instregex "COM(P?)_FST0r", "UCOM_F(P?)r")>; 925 // m. 926 def : InstRW<[ZnWriteFPU0Lat1Ld], (instregex "FCOM(P?)(32|64)m")>; 927 928 // FCOMPP FUCOMPP. 929 // r. 930 def : InstRW<[ZnWriteFPU0Lat1], (instrs FCOMPP, UCOM_FPPr)>; 931 932 def ZnWriteFPU02 : SchedWriteRes<[ZnAGU, ZnFPU02]> 933 { 934 let Latency = 9; 935 } 936 937 // FCOMI(P) FUCOMI(P). 938 // m. 939 def : InstRW<[ZnWriteFPU02], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>; 940 941 def ZnWriteFPU03 : SchedWriteRes<[ZnAGU, ZnFPU03]> 942 { 943 let Latency = 12; 944 let NumMicroOps = 2; 945 let ResourceCycles = [1,3]; 946 } 947 948 // FICOM(P). 949 def : InstRW<[ZnWriteFPU03], (instregex "FICOM(P?)(16|32)m")>; 950 951 // FTST. 952 def : InstRW<[ZnWriteFPU0Lat1], (instregex "TST_F")>; 953 954 // FXAM. 955 def : InstRW<[ZnWriteFPU3Lat1], (instrs FXAM)>; 956 957 // FPREM. 958 def : InstRW<[WriteMicrocoded], (instrs FPREM)>; 959 960 // FPREM1. 961 def : InstRW<[WriteMicrocoded], (instrs FPREM1)>; 962 963 // FRNDINT. 964 def : InstRW<[WriteMicrocoded], (instrs FRNDINT)>; 965 966 // FSCALE. 967 def : InstRW<[WriteMicrocoded], (instrs FSCALE)>; 968 969 // FXTRACT. 970 def : InstRW<[WriteMicrocoded], (instrs FXTRACT)>; 971 972 // FNOP. 973 def : InstRW<[ZnWriteFPU0Lat1], (instrs FNOP)>; 974 975 // WAIT. 976 def : InstRW<[ZnWriteFPU0Lat1], (instrs WAIT)>; 977 978 // FNCLEX. 979 def : InstRW<[WriteMicrocoded], (instrs FNCLEX)>; 980 981 // FNINIT. 982 def : InstRW<[WriteMicrocoded], (instrs FNINIT)>; 983 984 //=== Integer MMX and XMM Instructions ===// 985 986 // PACKSSWB/DW. 987 // mm <- mm. 988 def ZnWriteFPU12 : SchedWriteRes<[ZnFPU12]> ; 989 def ZnWriteFPU12Y : SchedWriteRes<[ZnFPU12]> { 990 let NumMicroOps = 2; 991 } 992 def ZnWriteFPU12m : SchedWriteRes<[ZnAGU, ZnFPU12]> ; 993 def ZnWriteFPU12Ym : SchedWriteRes<[ZnAGU, ZnFPU12]> { 994 let Latency = 8; 995 let NumMicroOps = 2; 996 } 997 998 def : InstRW<[ZnWriteFPU12], (instrs MMX_PACKSSDWirr, 999 MMX_PACKSSWBirr, 1000 MMX_PACKUSWBirr)>; 1001 def : InstRW<[ZnWriteFPU12m], (instrs MMX_PACKSSDWirm, 1002 MMX_PACKSSWBirm, 1003 MMX_PACKUSWBirm)>; 1004 1005 // VPMOVSX/ZX BW BD BQ WD WQ DQ. 1006 // y <- x. 1007 def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrr")>; 1008 def : InstRW<[ZnWriteFPU12Ym], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrm")>; 1009 1010 def ZnWriteFPU013 : SchedWriteRes<[ZnFPU013]> ; 1011 def ZnWriteFPU013Y : SchedWriteRes<[ZnFPU013]> { 1012 let Latency = 2; 1013 } 1014 def ZnWriteFPU013m : SchedWriteRes<[ZnAGU, ZnFPU013]> { 1015 let Latency = 8; 1016 let NumMicroOps = 2; 1017 } 1018 def ZnWriteFPU013Ld : SchedWriteRes<[ZnAGU, ZnFPU013]> { 1019 let Latency = 8; 1020 let NumMicroOps = 2; 1021 } 1022 def ZnWriteFPU013LdY : SchedWriteRes<[ZnAGU, ZnFPU013]> { 1023 let Latency = 9; 1024 let NumMicroOps = 2; 1025 } 1026 1027 // PBLENDW. 1028 // x,x,i / v,v,v,i 1029 def : InstRW<[ZnWriteFPU013], (instregex "(V?)PBLENDWrri")>; 1030 // ymm 1031 def : InstRW<[ZnWriteFPU013Y], (instrs VPBLENDWYrri)>; 1032 1033 // x,m,i / v,v,m,i 1034 def : InstRW<[ZnWriteFPU013Ld], (instregex "(V?)PBLENDWrmi")>; 1035 // y,m,i 1036 def : InstRW<[ZnWriteFPU013LdY], (instrs VPBLENDWYrmi)>; 1037 1038 def ZnWriteFPU01 : SchedWriteRes<[ZnFPU01]> ; 1039 def ZnWriteFPU01Y : SchedWriteRes<[ZnFPU01]> { 1040 let NumMicroOps = 2; 1041 } 1042 1043 // VPBLENDD. 1044 // v,v,v,i. 1045 def : InstRW<[ZnWriteFPU01], (instrs VPBLENDDrri)>; 1046 // ymm 1047 def : InstRW<[ZnWriteFPU01Y], (instrs VPBLENDDYrri)>; 1048 1049 // v,v,m,i 1050 def ZnWriteFPU01Op2 : SchedWriteRes<[ZnAGU, ZnFPU01]> { 1051 let NumMicroOps = 2; 1052 let Latency = 8; 1053 let ResourceCycles = [1, 2]; 1054 } 1055 def ZnWriteFPU01Op2Y : SchedWriteRes<[ZnAGU, ZnFPU01]> { 1056 let NumMicroOps = 2; 1057 let Latency = 9; 1058 let ResourceCycles = [1, 3]; 1059 } 1060 def : InstRW<[ZnWriteFPU01Op2], (instrs VPBLENDDrmi)>; 1061 def : InstRW<[ZnWriteFPU01Op2Y], (instrs VPBLENDDYrmi)>; 1062 1063 // MASKMOVQ. 1064 def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>; 1065 1066 // MASKMOVDQU. 1067 def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>; 1068 1069 // VPMASKMOVD. 1070 // ymm 1071 def : InstRW<[WriteMicrocoded], 1072 (instregex "VPMASKMOVD(Y?)rm")>; 1073 // m, v,v. 1074 def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>; 1075 1076 // VPBROADCAST B/W. 1077 // x, m8/16. 1078 def ZnWriteVPBROADCAST128Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> { 1079 let Latency = 8; 1080 let NumMicroOps = 2; 1081 let ResourceCycles = [1, 2]; 1082 } 1083 def : InstRW<[ZnWriteVPBROADCAST128Ld], 1084 (instregex "VPBROADCAST(B|W)rm")>; 1085 1086 // y, m8/16 1087 def ZnWriteVPBROADCAST256Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> { 1088 let Latency = 8; 1089 let NumMicroOps = 2; 1090 let ResourceCycles = [1, 2]; 1091 } 1092 def : InstRW<[ZnWriteVPBROADCAST256Ld], 1093 (instregex "VPBROADCAST(B|W)Yrm")>; 1094 1095 // VPGATHER. 1096 def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>; 1097 1098 //-- Arithmetic instructions --// 1099 1100 // HADD, HSUB PS/PD 1101 // PHADD|PHSUB (S) W/D. 1102 def : SchedAlias<WritePHAdd, ZnWriteMicrocoded>; 1103 def : SchedAlias<WritePHAddLd, ZnWriteMicrocoded>; 1104 def : SchedAlias<WritePHAddX, ZnWriteMicrocoded>; 1105 def : SchedAlias<WritePHAddXLd, ZnWriteMicrocoded>; 1106 def : SchedAlias<WritePHAddY, ZnWriteMicrocoded>; 1107 def : SchedAlias<WritePHAddYLd, ZnWriteMicrocoded>; 1108 1109 // PCMPGTQ. 1110 def ZnWritePCMPGTQr : SchedWriteRes<[ZnFPU03]>; 1111 def : InstRW<[ZnWritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>; 1112 1113 // x <- x,m. 1114 def ZnWritePCMPGTQm : SchedWriteRes<[ZnAGU, ZnFPU03]> { 1115 let Latency = 8; 1116 } 1117 // ymm. 1118 def ZnWritePCMPGTQYm : SchedWriteRes<[ZnAGU, ZnFPU03]> { 1119 let Latency = 8; 1120 let NumMicroOps = 2; 1121 let ResourceCycles = [1,2]; 1122 } 1123 def : InstRW<[ZnWritePCMPGTQm], (instregex "(V?)PCMPGTQrm")>; 1124 def : InstRW<[ZnWritePCMPGTQYm], (instrs VPCMPGTQYrm)>; 1125 1126 //-- Logic instructions --// 1127 1128 // PSLL,PSRL,PSRA W/D/Q. 1129 // x,x / v,v,x. 1130 def ZnWritePShift : SchedWriteRes<[ZnFPU2]> ; 1131 def ZnWritePShiftY : SchedWriteRes<[ZnFPU2]> { 1132 let Latency = 2; 1133 } 1134 1135 // PSLL,PSRL DQ. 1136 def : InstRW<[ZnWritePShift], (instregex "(V?)PS(R|L)LDQri")>; 1137 def : InstRW<[ZnWritePShiftY], (instregex "(V?)PS(R|L)LDQYri")>; 1138 1139 //=== Floating Point XMM and YMM Instructions ===// 1140 //-- Move instructions --// 1141 1142 // VPERM2F128. 1143 def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr)>; 1144 def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm)>; 1145 1146 def ZnWriteBROADCAST : SchedWriteRes<[ZnAGU, ZnFPU13]> { 1147 let NumMicroOps = 2; 1148 let Latency = 8; 1149 } 1150 // VBROADCASTF128. 1151 def : InstRW<[ZnWriteBROADCAST], (instrs VBROADCASTF128)>; 1152 1153 // EXTRACTPS. 1154 // r32,x,i. 1155 def ZnWriteEXTRACTPSr : SchedWriteRes<[ZnFPU12, ZnFPU2]> { 1156 let Latency = 2; 1157 let NumMicroOps = 2; 1158 let ResourceCycles = [1, 2]; 1159 } 1160 def : InstRW<[ZnWriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>; 1161 1162 def ZnWriteEXTRACTPSm : SchedWriteRes<[ZnAGU,ZnFPU12, ZnFPU2]> { 1163 let Latency = 5; 1164 let NumMicroOps = 2; 1165 let ResourceCycles = [5, 1, 2]; 1166 } 1167 // m32,x,i. 1168 def : InstRW<[ZnWriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>; 1169 1170 // VEXTRACTF128. 1171 // x,y,i. 1172 def : InstRW<[ZnWriteFPU013], (instrs VEXTRACTF128rr)>; 1173 1174 // m128,y,i. 1175 def : InstRW<[ZnWriteFPU013m], (instrs VEXTRACTF128mr)>; 1176 1177 def ZnWriteVINSERT128r: SchedWriteRes<[ZnFPU013]> { 1178 let Latency = 2; 1179 let ResourceCycles = [2]; 1180 } 1181 def ZnWriteVINSERT128Ld: SchedWriteRes<[ZnAGU,ZnFPU013]> { 1182 let Latency = 9; 1183 let NumMicroOps = 2; 1184 let ResourceCycles = [1, 2]; 1185 } 1186 // VINSERTF128. 1187 // y,y,x,i. 1188 def : InstRW<[ZnWriteVINSERT128r], (instrs VINSERTF128rr)>; 1189 def : InstRW<[ZnWriteVINSERT128Ld], (instrs VINSERTF128rm)>; 1190 1191 // VGATHER. 1192 def : InstRW<[WriteMicrocoded], (instregex "VGATHER(Q|D)(PD|PS)(Y?)rm")>; 1193 1194 //-- Conversion instructions --// 1195 def ZnWriteCVTPD2PSr: SchedWriteRes<[ZnFPU3]> { 1196 let Latency = 4; 1197 } 1198 def ZnWriteCVTPD2PSYr: SchedWriteRes<[ZnFPU3]> { 1199 let Latency = 5; 1200 } 1201 1202 // CVTPD2PS. 1203 // x,x. 1204 def : SchedAlias<WriteCvtPD2PS, ZnWriteCVTPD2PSr>; 1205 // y,y. 1206 def : SchedAlias<WriteCvtPD2PSY, ZnWriteCVTPD2PSYr>; 1207 // z,z. 1208 defm : X86WriteResUnsupported<WriteCvtPD2PSZ>; 1209 1210 def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> { 1211 let Latency = 11; 1212 let NumMicroOps = 2; 1213 let ResourceCycles = [1,2]; 1214 } 1215 // x,m128. 1216 def : SchedAlias<WriteCvtPD2PSLd, ZnWriteCVTPD2PSLd>; 1217 1218 // x,m256. 1219 def ZnWriteCVTPD2PSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { 1220 let Latency = 11; 1221 } 1222 def : SchedAlias<WriteCvtPD2PSYLd, ZnWriteCVTPD2PSYLd>; 1223 // z,m512 1224 defm : X86WriteResUnsupported<WriteCvtPD2PSZLd>; 1225 1226 // CVTSD2SS. 1227 // x,x. 1228 // Same as WriteCVTPD2PSr 1229 def : SchedAlias<WriteCvtSD2SS, ZnWriteCVTPD2PSr>; 1230 1231 // x,m64. 1232 def : SchedAlias<WriteCvtSD2SSLd, ZnWriteCVTPD2PSLd>; 1233 1234 // CVTPS2PD. 1235 // x,x. 1236 def ZnWriteCVTPS2PDr : SchedWriteRes<[ZnFPU3]> { 1237 let Latency = 3; 1238 } 1239 def : SchedAlias<WriteCvtPS2PD, ZnWriteCVTPS2PDr>; 1240 1241 // x,m64. 1242 // y,m128. 1243 def ZnWriteCVTPS2PDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { 1244 let Latency = 10; 1245 let NumMicroOps = 2; 1246 } 1247 def : SchedAlias<WriteCvtPS2PDLd, ZnWriteCVTPS2PDLd>; 1248 def : SchedAlias<WriteCvtPS2PDYLd, ZnWriteCVTPS2PDLd>; 1249 defm : X86WriteResUnsupported<WriteCvtPS2PDZLd>; 1250 1251 // y,x. 1252 def ZnWriteVCVTPS2PDY : SchedWriteRes<[ZnFPU3]> { 1253 let Latency = 3; 1254 } 1255 def : SchedAlias<WriteCvtPS2PDY, ZnWriteVCVTPS2PDY>; 1256 defm : X86WriteResUnsupported<WriteCvtPS2PDZ>; 1257 1258 // CVTSS2SD. 1259 // x,x. 1260 def ZnWriteCVTSS2SDr : SchedWriteRes<[ZnFPU3]> { 1261 let Latency = 4; 1262 } 1263 def : SchedAlias<WriteCvtSS2SD, ZnWriteCVTSS2SDr>; 1264 1265 // x,m32. 1266 def ZnWriteCVTSS2SDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { 1267 let Latency = 11; 1268 let NumMicroOps = 2; 1269 let ResourceCycles = [1, 2]; 1270 } 1271 def : SchedAlias<WriteCvtSS2SDLd, ZnWriteCVTSS2SDLd>; 1272 1273 def ZnWriteCVTDQ2PDr: SchedWriteRes<[ZnFPU12,ZnFPU3]> { 1274 let Latency = 5; 1275 } 1276 // CVTDQ2PD. 1277 // x,x. 1278 def : InstRW<[ZnWriteCVTDQ2PDr], (instregex "(V)?CVTDQ2PDrr")>; 1279 1280 // Same as xmm 1281 // y,x. 1282 def : InstRW<[ZnWriteCVTDQ2PDr], (instrs VCVTDQ2PDYrr)>; 1283 1284 def ZnWriteCVTPD2DQr: SchedWriteRes<[ZnFPU12, ZnFPU3]> { 1285 let Latency = 5; 1286 } 1287 // CVT(T)PD2DQ. 1288 // x,x. 1289 def : InstRW<[ZnWriteCVTDQ2PDr], (instregex "(V?)CVT(T?)PD2DQrr")>; 1290 1291 def ZnWriteCVTPD2DQLd: SchedWriteRes<[ZnAGU,ZnFPU12,ZnFPU3]> { 1292 let Latency = 12; 1293 let NumMicroOps = 2; 1294 } 1295 // x,m128. 1296 def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(V?)CVT(T?)PD2DQrm")>; 1297 // same as xmm handling 1298 // x,y. 1299 def : InstRW<[ZnWriteCVTPD2DQr], (instregex "VCVT(T?)PD2DQYrr")>; 1300 // x,m256. 1301 def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "VCVT(T?)PD2DQYrm")>; 1302 1303 def ZnWriteCVTPS2PIr: SchedWriteRes<[ZnFPU3]> { 1304 let Latency = 4; 1305 } 1306 // CVT(T)PS2PI. 1307 // mm,x. 1308 def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIirr")>; 1309 1310 // CVTPI2PD. 1311 // x,mm. 1312 def : InstRW<[ZnWriteCVTPS2PDr], (instrs MMX_CVTPI2PDirr)>; 1313 1314 // CVT(T)PD2PI. 1315 // mm,x. 1316 def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIirr")>; 1317 1318 def ZnWriteCVSTSI2SSr: SchedWriteRes<[ZnFPU3]> { 1319 let Latency = 5; 1320 } 1321 1322 // same as CVTPD2DQr 1323 // CVT(T)SS2SI. 1324 // r32,x. 1325 def : InstRW<[ZnWriteCVTPD2DQr], (instregex "(V?)CVT(T?)SS2SI(64)?rr")>; 1326 // same as CVTPD2DQm 1327 // r32,m32. 1328 def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(V?)CVT(T?)SS2SI(64)?rm")>; 1329 1330 def ZnWriteCVSTSI2SDr: SchedWriteRes<[ZnFPU013, ZnFPU3]> { 1331 let Latency = 5; 1332 } 1333 // CVTSI2SD. 1334 // x,r32/64. 1335 def : InstRW<[ZnWriteCVSTSI2SDr], (instregex "(V?)CVTSI(64)?2SDrr")>; 1336 1337 1338 def ZnWriteCVSTSI2SIr: SchedWriteRes<[ZnFPU3, ZnFPU2]> { 1339 let Latency = 5; 1340 } 1341 def ZnWriteCVSTSI2SILd: SchedWriteRes<[ZnAGU, ZnFPU3, ZnFPU2]> { 1342 let Latency = 12; 1343 } 1344 // CVTSD2SI. 1345 // r32/64 1346 def : InstRW<[ZnWriteCVSTSI2SIr], (instregex "(V?)CVT(T?)SD2SI(64)?rr")>; 1347 // r32,m32. 1348 def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>; 1349 1350 // VCVTPS2PH. 1351 // x,v,i. 1352 def : SchedAlias<WriteCvtPS2PH, ZnWriteMicrocoded>; 1353 def : SchedAlias<WriteCvtPS2PHY, ZnWriteMicrocoded>; 1354 defm : X86WriteResUnsupported<WriteCvtPS2PHZ>; 1355 // m,v,i. 1356 def : SchedAlias<WriteCvtPS2PHSt, ZnWriteMicrocoded>; 1357 def : SchedAlias<WriteCvtPS2PHYSt, ZnWriteMicrocoded>; 1358 defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; 1359 1360 // VCVTPH2PS. 1361 // v,x. 1362 def : SchedAlias<WriteCvtPH2PS, ZnWriteMicrocoded>; 1363 def : SchedAlias<WriteCvtPH2PSY, ZnWriteMicrocoded>; 1364 defm : X86WriteResUnsupported<WriteCvtPH2PSZ>; 1365 // v,m. 1366 def : SchedAlias<WriteCvtPH2PSLd, ZnWriteMicrocoded>; 1367 def : SchedAlias<WriteCvtPH2PSYLd, ZnWriteMicrocoded>; 1368 defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>; 1369 1370 //-- SSE4A instructions --// 1371 // EXTRQ 1372 def ZnWriteEXTRQ: SchedWriteRes<[ZnFPU12, ZnFPU2]> { 1373 let Latency = 2; 1374 } 1375 def : InstRW<[ZnWriteEXTRQ], (instregex "EXTRQ")>; 1376 1377 // INSERTQ 1378 def ZnWriteINSERTQ: SchedWriteRes<[ZnFPU03,ZnFPU1]> { 1379 let Latency = 4; 1380 } 1381 def : InstRW<[ZnWriteINSERTQ], (instregex "INSERTQ")>; 1382 1383 //-- SHA instructions --// 1384 // SHA256MSG2 1385 def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>; 1386 1387 // SHA1MSG1, SHA256MSG1 1388 // x,x. 1389 def ZnWriteSHA1MSG1r : SchedWriteRes<[ZnFPU12]> { 1390 let Latency = 2; 1391 let ResourceCycles = [2]; 1392 } 1393 def : InstRW<[ZnWriteSHA1MSG1r], (instregex "SHA(1|256)MSG1rr")>; 1394 // x,m. 1395 def ZnWriteSHA1MSG1Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> { 1396 let Latency = 9; 1397 let ResourceCycles = [1,2]; 1398 } 1399 def : InstRW<[ZnWriteSHA1MSG1Ld], (instregex "SHA(1|256)MSG1rm")>; 1400 1401 // SHA1MSG2 1402 // x,x. 1403 def ZnWriteSHA1MSG2r : SchedWriteRes<[ZnFPU12]> ; 1404 def : InstRW<[ZnWriteSHA1MSG2r], (instregex "SHA1MSG2rr")>; 1405 // x,m. 1406 def ZnWriteSHA1MSG2Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> { 1407 let Latency = 8; 1408 } 1409 def : InstRW<[ZnWriteSHA1MSG2Ld], (instregex "SHA1MSG2rm")>; 1410 1411 // SHA1NEXTE 1412 // x,x. 1413 def ZnWriteSHA1NEXTEr : SchedWriteRes<[ZnFPU1]> ; 1414 def : InstRW<[ZnWriteSHA1NEXTEr], (instregex "SHA1NEXTErr")>; 1415 // x,m. 1416 def ZnWriteSHA1NEXTELd : SchedWriteRes<[ZnAGU, ZnFPU1]> { 1417 let Latency = 8; 1418 } 1419 def : InstRW<[ZnWriteSHA1NEXTELd], (instregex "SHA1NEXTErm")>; 1420 1421 // SHA1RNDS4 1422 // x,x. 1423 def ZnWriteSHA1RNDS4r : SchedWriteRes<[ZnFPU1]> { 1424 let Latency = 6; 1425 } 1426 def : InstRW<[ZnWriteSHA1RNDS4r], (instregex "SHA1RNDS4rr")>; 1427 // x,m. 1428 def ZnWriteSHA1RNDS4Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> { 1429 let Latency = 13; 1430 } 1431 def : InstRW<[ZnWriteSHA1RNDS4Ld], (instregex "SHA1RNDS4rm")>; 1432 1433 // SHA256RNDS2 1434 // x,x. 1435 def ZnWriteSHA256RNDS2r : SchedWriteRes<[ZnFPU1]> { 1436 let Latency = 4; 1437 } 1438 def : InstRW<[ZnWriteSHA256RNDS2r], (instregex "SHA256RNDS2rr")>; 1439 // x,m. 1440 def ZnWriteSHA256RNDS2Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> { 1441 let Latency = 11; 1442 } 1443 def : InstRW<[ZnWriteSHA256RNDS2Ld], (instregex "SHA256RNDS2rm")>; 1444 1445 //-- Arithmetic instructions --// 1446 1447 // HADD, HSUB PS/PD 1448 def : SchedAlias<WriteFHAdd, ZnWriteMicrocoded>; 1449 def : SchedAlias<WriteFHAddLd, ZnWriteMicrocoded>; 1450 def : SchedAlias<WriteFHAddY, ZnWriteMicrocoded>; 1451 def : SchedAlias<WriteFHAddYLd, ZnWriteMicrocoded>; 1452 1453 // VDIVPS. 1454 // TODO - convert to ZnWriteResFpuPair 1455 // y,y,y. 1456 def ZnWriteVDIVPSYr : SchedWriteRes<[ZnFPU3]> { 1457 let Latency = 12; 1458 let ResourceCycles = [12]; 1459 } 1460 def : SchedAlias<WriteFDivY, ZnWriteVDIVPSYr>; 1461 1462 // y,y,m256. 1463 def ZnWriteVDIVPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { 1464 let Latency = 19; 1465 let NumMicroOps = 2; 1466 let ResourceCycles = [1, 19]; 1467 } 1468 def : SchedAlias<WriteFDivYLd, ZnWriteVDIVPSYLd>; 1469 1470 // VDIVPD. 1471 // TODO - convert to ZnWriteResFpuPair 1472 // y,y,y. 1473 def ZnWriteVDIVPDY : SchedWriteRes<[ZnFPU3]> { 1474 let Latency = 15; 1475 let ResourceCycles = [15]; 1476 } 1477 def : SchedAlias<WriteFDiv64Y, ZnWriteVDIVPDY>; 1478 1479 // y,y,m256. 1480 def ZnWriteVDIVPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { 1481 let Latency = 22; 1482 let NumMicroOps = 2; 1483 let ResourceCycles = [1,22]; 1484 } 1485 def : SchedAlias<WriteFDiv64YLd, ZnWriteVDIVPDYLd>; 1486 1487 // DPPS. 1488 // x,x,i / v,v,v,i. 1489 def : SchedAlias<WriteDPPS, ZnWriteMicrocoded>; 1490 def : SchedAlias<WriteDPPSY, ZnWriteMicrocoded>; 1491 1492 // x,m,i / v,v,m,i. 1493 def : SchedAlias<WriteDPPSLd, ZnWriteMicrocoded>; 1494 def : SchedAlias<WriteDPPSYLd,ZnWriteMicrocoded>; 1495 1496 // DPPD. 1497 // x,x,i. 1498 def : SchedAlias<WriteDPPD, ZnWriteMicrocoded>; 1499 1500 // x,m,i. 1501 def : SchedAlias<WriteDPPDLd, ZnWriteMicrocoded>; 1502 1503 // RSQRTSS 1504 // TODO - convert to ZnWriteResFpuPair 1505 // x,x. 1506 def ZnWriteRSQRTSSr : SchedWriteRes<[ZnFPU02]> { 1507 let Latency = 5; 1508 } 1509 def : SchedAlias<WriteFRsqrt, ZnWriteRSQRTSSr>; 1510 1511 // x,m128. 1512 def ZnWriteRSQRTSSLd: SchedWriteRes<[ZnAGU, ZnFPU02]> { 1513 let Latency = 12; 1514 let NumMicroOps = 2; 1515 let ResourceCycles = [1,2]; // FIXME: Is this right? 1516 } 1517 def : SchedAlias<WriteFRsqrtLd, ZnWriteRSQRTSSLd>; 1518 1519 // RSQRTPS 1520 // TODO - convert to ZnWriteResFpuPair 1521 // y,y. 1522 def ZnWriteRSQRTPSYr : SchedWriteRes<[ZnFPU01]> { 1523 let Latency = 5; 1524 let NumMicroOps = 2; 1525 let ResourceCycles = [2]; 1526 } 1527 def : SchedAlias<WriteFRsqrtY, ZnWriteRSQRTPSYr>; 1528 1529 // y,m256. 1530 def ZnWriteRSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> { 1531 let Latency = 12; 1532 let NumMicroOps = 2; 1533 } 1534 def : SchedAlias<WriteFRsqrtYLd, ZnWriteRSQRTPSYLd>; 1535 1536 //-- Other instructions --// 1537 1538 // VZEROUPPER. 1539 def : InstRW<[WriteMicrocoded], (instrs VZEROUPPER)>; 1540 1541 // VZEROALL. 1542 def : InstRW<[WriteMicrocoded], (instrs VZEROALL)>; 1543 1544 } // SchedModel 1545