1 //=- X86SchedHaswell.td - X86 Haswell Scheduling -------------*- tablegen -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the machine model for Haswell to support instruction 11 // scheduling and other instruction cost heuristics. 12 // 13 //===----------------------------------------------------------------------===// 14 15 def HaswellModel : SchedMachineModel { 16 // All x86 instructions are modeled as a single micro-op, and HW can decode 4 17 // instructions per cycle. 18 let IssueWidth = 4; 19 let MicroOpBufferSize = 192; // Based on the reorder buffer. 20 let LoadLatency = 4; 21 let MispredictPenalty = 16; 22 23 // Based on the LSD (loop-stream detector) queue size and benchmarking data. 24 let LoopMicroOpBufferSize = 50; 25 26 // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow 27 // the scheduler to assign a default model to unrecognized opcodes. 28 let CompleteModel = 0; 29 } 30 31 let SchedModel = HaswellModel in { 32 33 // Haswell can issue micro-ops to 8 different ports in one cycle. 34 35 // Ports 0, 1, 5, and 6 handle all computation. 36 // Port 4 gets the data half of stores. Store data can be available later than 37 // the store address, but since we don't model the latency of stores, we can 38 // ignore that. 39 // Ports 2 and 3 are identical. They handle loads and the address half of 40 // stores. Port 7 can handle address calculations. 41 def HWPort0 : ProcResource<1>; 42 def HWPort1 : ProcResource<1>; 43 def HWPort2 : ProcResource<1>; 44 def HWPort3 : ProcResource<1>; 45 def HWPort4 : ProcResource<1>; 46 def HWPort5 : ProcResource<1>; 47 def HWPort6 : ProcResource<1>; 48 def HWPort7 : ProcResource<1>; 49 50 // Many micro-ops are capable of issuing on multiple ports. 51 def HWPort01 : ProcResGroup<[HWPort0, HWPort1]>; 52 def HWPort23 : ProcResGroup<[HWPort2, HWPort3]>; 53 def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>; 54 def HWPort04 : ProcResGroup<[HWPort0, HWPort4]>; 55 def HWPort05 : ProcResGroup<[HWPort0, HWPort5]>; 56 def HWPort06 : ProcResGroup<[HWPort0, HWPort6]>; 57 def HWPort15 : ProcResGroup<[HWPort1, HWPort5]>; 58 def HWPort16 : ProcResGroup<[HWPort1, HWPort6]>; 59 def HWPort56 : ProcResGroup<[HWPort5, HWPort6]>; 60 def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>; 61 def HWPort056 : ProcResGroup<[HWPort0, HWPort5, HWPort6]>; 62 def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>; 63 64 // 60 Entry Unified Scheduler 65 def HWPortAny : ProcResGroup<[HWPort0, HWPort1, HWPort2, HWPort3, HWPort4, 66 HWPort5, HWPort6, HWPort7]> { 67 let BufferSize=60; 68 } 69 70 // Integer division issued on port 0. 71 def HWDivider : ProcResource<1>; 72 73 // Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4 74 // cycles after the memory operand. 75 def : ReadAdvance<ReadAfterLd, 4>; 76 77 // Many SchedWrites are defined in pairs with and without a folded load. 78 // Instructions with folded loads are usually micro-fused, so they only appear 79 // as two micro-ops when queued in the reservation station. 80 // This multiclass defines the resource usage for variants with and without 81 // folded loads. 82 multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW, 83 ProcResourceKind ExePort, 84 int Lat> { 85 // Register variant is using a single cycle on ExePort. 86 def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; } 87 88 // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the 89 // latency. 90 def : WriteRes<SchedRW.Folded, [HWPort23, ExePort]> { 91 let Latency = !add(Lat, 4); 92 } 93 } 94 95 // A folded store needs a cycle on port 4 for the store data, but it does not 96 // need an extra port 2/3 cycle to recompute the address. 97 def : WriteRes<WriteRMW, [HWPort4]>; 98 99 // Store_addr on 237. 100 // Store_data on 4. 101 def : WriteRes<WriteStore, [HWPort237, HWPort4]>; 102 def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 4; } 103 def : WriteRes<WriteMove, [HWPort0156]>; 104 def : WriteRes<WriteZero, []>; 105 106 defm : HWWriteResPair<WriteALU, HWPort0156, 1>; 107 defm : HWWriteResPair<WriteIMul, HWPort1, 3>; 108 def : WriteRes<WriteIMulH, []> { let Latency = 3; } 109 defm : HWWriteResPair<WriteShift, HWPort06, 1>; 110 defm : HWWriteResPair<WriteJump, HWPort06, 1>; 111 112 // This is for simple LEAs with one or two input operands. 113 // The complex ones can only execute on port 1, and they require two cycles on 114 // the port to read all inputs. We don't model that. 115 def : WriteRes<WriteLEA, [HWPort15]>; 116 117 // This is quite rough, latency depends on the dividend. 118 def : WriteRes<WriteIDiv, [HWPort0, HWDivider]> { 119 let Latency = 25; 120 let ResourceCycles = [1, 10]; 121 } 122 def : WriteRes<WriteIDivLd, [HWPort23, HWPort0, HWDivider]> { 123 let Latency = 29; 124 let ResourceCycles = [1, 1, 10]; 125 } 126 127 // Scalar and vector floating point. 128 defm : HWWriteResPair<WriteFAdd, HWPort1, 3>; 129 defm : HWWriteResPair<WriteFMul, HWPort0, 5>; 130 defm : HWWriteResPair<WriteFDiv, HWPort0, 12>; // 10-14 cycles. 131 defm : HWWriteResPair<WriteFRcp, HWPort0, 5>; 132 defm : HWWriteResPair<WriteFRsqrt, HWPort0, 5>; 133 defm : HWWriteResPair<WriteFSqrt, HWPort0, 15>; 134 defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>; 135 defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>; 136 defm : HWWriteResPair<WriteCvtF2F, HWPort1, 3>; 137 defm : HWWriteResPair<WriteFShuffle, HWPort5, 1>; 138 defm : HWWriteResPair<WriteFBlend, HWPort015, 1>; 139 defm : HWWriteResPair<WriteFShuffle256, HWPort5, 3>; 140 141 def : WriteRes<WriteFVarBlend, [HWPort5]> { 142 let Latency = 2; 143 let ResourceCycles = [2]; 144 } 145 def : WriteRes<WriteFVarBlendLd, [HWPort5, HWPort23]> { 146 let Latency = 6; 147 let ResourceCycles = [2, 1]; 148 } 149 150 // Vector integer operations. 151 defm : HWWriteResPair<WriteVecShift, HWPort0, 1>; 152 defm : HWWriteResPair<WriteVecLogic, HWPort015, 1>; 153 defm : HWWriteResPair<WriteVecALU, HWPort15, 1>; 154 defm : HWWriteResPair<WriteVecIMul, HWPort0, 5>; 155 defm : HWWriteResPair<WriteShuffle, HWPort5, 1>; 156 defm : HWWriteResPair<WriteBlend, HWPort15, 1>; 157 defm : HWWriteResPair<WriteShuffle256, HWPort5, 3>; 158 159 def : WriteRes<WriteVarBlend, [HWPort5]> { 160 let Latency = 2; 161 let ResourceCycles = [2]; 162 } 163 def : WriteRes<WriteVarBlendLd, [HWPort5, HWPort23]> { 164 let Latency = 6; 165 let ResourceCycles = [2, 1]; 166 } 167 168 def : WriteRes<WriteVarVecShift, [HWPort0, HWPort5]> { 169 let Latency = 2; 170 let ResourceCycles = [2, 1]; 171 } 172 def : WriteRes<WriteVarVecShiftLd, [HWPort0, HWPort5, HWPort23]> { 173 let Latency = 6; 174 let ResourceCycles = [2, 1, 1]; 175 } 176 177 def : WriteRes<WriteMPSAD, [HWPort0, HWPort5]> { 178 let Latency = 6; 179 let ResourceCycles = [1, 2]; 180 } 181 def : WriteRes<WriteMPSADLd, [HWPort23, HWPort0, HWPort5]> { 182 let Latency = 6; 183 let ResourceCycles = [1, 1, 2]; 184 } 185 186 // String instructions. 187 // Packed Compare Implicit Length Strings, Return Mask 188 def : WriteRes<WritePCmpIStrM, [HWPort0]> { 189 let Latency = 10; 190 let ResourceCycles = [3]; 191 } 192 def : WriteRes<WritePCmpIStrMLd, [HWPort0, HWPort23]> { 193 let Latency = 10; 194 let ResourceCycles = [3, 1]; 195 } 196 197 // Packed Compare Explicit Length Strings, Return Mask 198 def : WriteRes<WritePCmpEStrM, [HWPort0, HWPort16, HWPort5]> { 199 let Latency = 10; 200 let ResourceCycles = [3, 2, 4]; 201 } 202 def : WriteRes<WritePCmpEStrMLd, [HWPort05, HWPort16, HWPort23]> { 203 let Latency = 10; 204 let ResourceCycles = [6, 2, 1]; 205 } 206 207 // Packed Compare Implicit Length Strings, Return Index 208 def : WriteRes<WritePCmpIStrI, [HWPort0]> { 209 let Latency = 11; 210 let ResourceCycles = [3]; 211 } 212 def : WriteRes<WritePCmpIStrILd, [HWPort0, HWPort23]> { 213 let Latency = 11; 214 let ResourceCycles = [3, 1]; 215 } 216 217 // Packed Compare Explicit Length Strings, Return Index 218 def : WriteRes<WritePCmpEStrI, [HWPort05, HWPort16]> { 219 let Latency = 11; 220 let ResourceCycles = [6, 2]; 221 } 222 def : WriteRes<WritePCmpEStrILd, [HWPort0, HWPort16, HWPort5, HWPort23]> { 223 let Latency = 11; 224 let ResourceCycles = [3, 2, 2, 1]; 225 } 226 227 // AES Instructions. 228 def : WriteRes<WriteAESDecEnc, [HWPort5]> { 229 let Latency = 7; 230 let ResourceCycles = [1]; 231 } 232 def : WriteRes<WriteAESDecEncLd, [HWPort5, HWPort23]> { 233 let Latency = 7; 234 let ResourceCycles = [1, 1]; 235 } 236 237 def : WriteRes<WriteAESIMC, [HWPort5]> { 238 let Latency = 14; 239 let ResourceCycles = [2]; 240 } 241 def : WriteRes<WriteAESIMCLd, [HWPort5, HWPort23]> { 242 let Latency = 14; 243 let ResourceCycles = [2, 1]; 244 } 245 246 def : WriteRes<WriteAESKeyGen, [HWPort0, HWPort5]> { 247 let Latency = 10; 248 let ResourceCycles = [2, 8]; 249 } 250 def : WriteRes<WriteAESKeyGenLd, [HWPort0, HWPort5, HWPort23]> { 251 let Latency = 10; 252 let ResourceCycles = [2, 7, 1]; 253 } 254 255 // Carry-less multiplication instructions. 256 def : WriteRes<WriteCLMul, [HWPort0, HWPort5]> { 257 let Latency = 7; 258 let ResourceCycles = [2, 1]; 259 } 260 def : WriteRes<WriteCLMulLd, [HWPort0, HWPort5, HWPort23]> { 261 let Latency = 7; 262 let ResourceCycles = [2, 1, 1]; 263 } 264 265 def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; } 266 def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; } 267 def : WriteRes<WriteFence, [HWPort23, HWPort4]>; 268 def : WriteRes<WriteNop, []>; 269 270 //================ Exceptions ================// 271 272 //-- Specific Scheduling Models --// 273 274 // Starting with P0. 275 def WriteP0 : SchedWriteRes<[HWPort0]>; 276 277 def WriteP0_P1_Lat4 : SchedWriteRes<[HWPort0, HWPort1]> { 278 let Latency = 4; 279 let NumMicroOps = 2; 280 let ResourceCycles = [1, 1]; 281 } 282 283 def WriteP0_P1_Lat4Ld : SchedWriteRes<[HWPort0, HWPort1, HWPort23]> { 284 let Latency = 8; 285 let NumMicroOps = 3; 286 let ResourceCycles = [1, 1, 1]; 287 } 288 289 def WriteP01 : SchedWriteRes<[HWPort01]>; 290 291 def Write2P01 : SchedWriteRes<[HWPort01]> { 292 let NumMicroOps = 2; 293 } 294 def Write3P01 : SchedWriteRes<[HWPort01]> { 295 let NumMicroOps = 3; 296 } 297 298 def WriteP015 : SchedWriteRes<[HWPort015]>; 299 300 def WriteP01_P5 : SchedWriteRes<[HWPort01, HWPort5]> { 301 let NumMicroOps = 2; 302 } 303 def WriteP06 : SchedWriteRes<[HWPort06]>; 304 305 def Write2P06 : SchedWriteRes<[HWPort06]> { 306 let Latency = 1; 307 let NumMicroOps = 2; 308 let ResourceCycles = [2]; 309 } 310 311 def Write3P06_Lat2 : SchedWriteRes<[HWPort06]> { 312 let Latency = 2; 313 let NumMicroOps = 3; 314 let ResourceCycles = [3]; 315 } 316 317 def WriteP0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> { 318 let NumMicroOps = 2; 319 } 320 321 def Write2P0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> { 322 let NumMicroOps = 3; 323 let ResourceCycles = [2, 1]; 324 } 325 326 def Write2P0156_Lat2 : SchedWriteRes<[HWPort0156]> { 327 let Latency = 2; 328 let ResourceCycles = [2]; 329 } 330 def Write2P0156_Lat2Ld : SchedWriteRes<[HWPort0156, HWPort23]> { 331 let Latency = 6; 332 let ResourceCycles = [2, 1]; 333 } 334 335 def Write5P0156 : SchedWriteRes<[HWPort0156]> { 336 let NumMicroOps = 5; 337 let ResourceCycles = [5]; 338 } 339 340 def WriteP0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { 341 let Latency = 1; 342 let ResourceCycles = [1, 2, 1]; 343 } 344 345 def Write2P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { 346 let Latency = 1; 347 let ResourceCycles = [2, 2, 1]; 348 } 349 350 def Write3P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { 351 let Latency = 1; 352 let ResourceCycles = [3, 2, 1]; 353 } 354 355 // Starting with P1. 356 def WriteP1 : SchedWriteRes<[HWPort1]>; 357 358 def WriteP1_P23 : SchedWriteRes<[HWPort1, HWPort23]> { 359 let NumMicroOps = 2; 360 } 361 def WriteP1_Lat3 : SchedWriteRes<[HWPort1]> { 362 let Latency = 3; 363 } 364 def WriteP1_Lat3Ld : SchedWriteRes<[HWPort1, HWPort23]> { 365 let Latency = 7; 366 } 367 368 def Write2P1 : SchedWriteRes<[HWPort1]> { 369 let NumMicroOps = 2; 370 let ResourceCycles = [2]; 371 } 372 def Write2P1_P23 : SchedWriteRes<[HWPort1, HWPort23]> { 373 let NumMicroOps = 3; 374 let ResourceCycles = [2, 1]; 375 } 376 def WriteP15 : SchedWriteRes<[HWPort15]>; 377 def WriteP15Ld : SchedWriteRes<[HWPort15, HWPort23]> { 378 let Latency = 4; 379 } 380 381 def WriteP1_P5_Lat4 : SchedWriteRes<[HWPort1, HWPort5]> { 382 let Latency = 4; 383 let NumMicroOps = 2; 384 let ResourceCycles = [1, 1]; 385 } 386 387 def WriteP1_P5_Lat4Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { 388 let Latency = 8; 389 let NumMicroOps = 3; 390 let ResourceCycles = [1, 1, 1]; 391 } 392 393 def WriteP1_P5_Lat6 : SchedWriteRes<[HWPort1, HWPort5]> { 394 let Latency = 6; 395 let NumMicroOps = 2; 396 let ResourceCycles = [1, 1]; 397 } 398 399 def WriteP1_P5_Lat6Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { 400 let Latency = 10; 401 let NumMicroOps = 3; 402 let ResourceCycles = [1, 1, 1]; 403 } 404 405 // Starting with P2. 406 def Write2P237_P4 : SchedWriteRes<[HWPort237, HWPort4]> { 407 let Latency = 1; 408 let ResourceCycles = [2, 1]; 409 } 410 411 // Starting with P5. 412 def WriteP5 : SchedWriteRes<[HWPort5]>; 413 def WriteP5Ld : SchedWriteRes<[HWPort5, HWPort23]> { 414 let Latency = 5; 415 let NumMicroOps = 2; 416 let ResourceCycles = [1, 1]; 417 } 418 419 // Notation: 420 // - r: register. 421 // - mm: 64 bit mmx register. 422 // - x = 128 bit xmm register. 423 // - (x)mm = mmx or xmm register. 424 // - y = 256 bit ymm register. 425 // - v = any vector register. 426 // - m = memory. 427 428 //=== Integer Instructions ===// 429 //-- Move instructions --// 430 431 // MOV. 432 // r16,m. 433 def : InstRW<[WriteALULd], (instregex "MOV16rm")>; 434 435 // MOVSX, MOVZX. 436 // r,m. 437 def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>; 438 439 // CMOVcc. 440 // r,r. 441 def : InstRW<[Write2P0156_Lat2], 442 (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rr")>; 443 // r,m. 444 def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd], 445 (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rm")>; 446 447 // XCHG. 448 // r,r. 449 def WriteXCHG : SchedWriteRes<[HWPort0156]> { 450 let Latency = 2; 451 let ResourceCycles = [3]; 452 } 453 454 def : InstRW<[WriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>; 455 456 // r,m. 457 def WriteXCHGrm : SchedWriteRes<[]> { 458 let Latency = 21; 459 let NumMicroOps = 8; 460 } 461 def : InstRW<[WriteXCHGrm], (instregex "XCHG(8|16|32|64)rm")>; 462 463 // XLAT. 464 def WriteXLAT : SchedWriteRes<[]> { 465 let Latency = 7; 466 let NumMicroOps = 3; 467 } 468 def : InstRW<[WriteXLAT], (instregex "XLAT")>; 469 470 // PUSH. 471 // m. 472 def : InstRW<[Write2P237_P4], (instregex "PUSH(16|32)rmm")>; 473 474 // PUSHF. 475 def WritePushF : SchedWriteRes<[HWPort1, HWPort4, HWPort237, HWPort06]> { 476 let NumMicroOps = 4; 477 } 478 def : InstRW<[WritePushF], (instregex "PUSHF(16|32)")>; 479 480 // PUSHA. 481 def WritePushA : SchedWriteRes<[]> { 482 let NumMicroOps = 19; 483 } 484 def : InstRW<[WritePushA], (instregex "PUSHA(16|32)")>; 485 486 // POP. 487 // m. 488 def : InstRW<[Write2P237_P4], (instregex "POP(16|32)rmm")>; 489 490 // POPF. 491 def WritePopF : SchedWriteRes<[]> { 492 let NumMicroOps = 9; 493 } 494 def : InstRW<[WritePopF], (instregex "POPF(16|32)")>; 495 496 // POPA. 497 def WritePopA : SchedWriteRes<[]> { 498 let NumMicroOps = 18; 499 } 500 def : InstRW<[WritePopA], (instregex "POPA(16|32)")>; 501 502 // LAHF SAHF. 503 def : InstRW<[WriteP06], (instregex "(S|L)AHF")>; 504 505 // BSWAP. 506 // r32. 507 def WriteBSwap32 : SchedWriteRes<[HWPort15]>; 508 def : InstRW<[WriteBSwap32], (instregex "BSWAP32r")>; 509 510 // r64. 511 def WriteBSwap64 : SchedWriteRes<[HWPort06, HWPort15]> { 512 let NumMicroOps = 2; 513 } 514 def : InstRW<[WriteBSwap64], (instregex "BSWAP64r")>; 515 516 // MOVBE. 517 // r16,m16 / r64,m64. 518 def : InstRW<[Write2P0156_Lat2Ld], (instregex "MOVBE(16|64)rm")>; 519 520 // r32, m32. 521 def WriteMoveBE32rm : SchedWriteRes<[HWPort15, HWPort23]> { 522 let NumMicroOps = 2; 523 } 524 def : InstRW<[WriteMoveBE32rm], (instregex "MOVBE32rm")>; 525 526 // m16,r16. 527 def WriteMoveBE16mr : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { 528 let NumMicroOps = 3; 529 } 530 def : InstRW<[WriteMoveBE16mr], (instregex "MOVBE16mr")>; 531 532 // m32,r32. 533 def WriteMoveBE32mr : SchedWriteRes<[HWPort15, HWPort237, HWPort4]> { 534 let NumMicroOps = 3; 535 } 536 def : InstRW<[WriteMoveBE32mr], (instregex "MOVBE32mr")>; 537 538 // m64,r64. 539 def WriteMoveBE64mr : SchedWriteRes<[HWPort06, HWPort15, HWPort237, HWPort4]> { 540 let NumMicroOps = 4; 541 } 542 def : InstRW<[WriteMoveBE64mr], (instregex "MOVBE64mr")>; 543 544 //-- Arithmetic instructions --// 545 546 // ADD SUB. 547 // m,r/i. 548 def : InstRW<[Write2P0156_2P237_P4], 549 (instregex "(ADD|SUB)(8|16|32|64)m(r|i)", 550 "(ADD|SUB)(8|16|32|64)mi8", "(ADD|SUB)64mi32")>; 551 552 // ADC SBB. 553 // r,r/i. 554 def : InstRW<[Write2P0156_Lat2], (instregex "(ADC|SBB)(8|16|32|64)r(r|i)", 555 "(ADC|SBB)(16|32|64)ri8", 556 "(ADC|SBB)64ri32", 557 "(ADC|SBB)(8|16|32|64)rr_REV")>; 558 559 // r,m. 560 def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd], (instregex "(ADC|SBB)(8|16|32|64)rm")>; 561 562 // m,r/i. 563 def : InstRW<[Write3P0156_2P237_P4], 564 (instregex "(ADC|SBB)(8|16|32|64)m(r|i)", 565 "(ADC|SBB)(16|32|64)mi8", 566 "(ADC|SBB)64mi32")>; 567 568 // INC DEC NOT NEG. 569 // m. 570 def : InstRW<[WriteP0156_2P237_P4], 571 (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m", 572 "(INC|DEC)64(16|32)m")>; 573 574 // MUL IMUL. 575 // r16. 576 def WriteMul16 : SchedWriteRes<[HWPort1, HWPort0156]> { 577 let Latency = 4; 578 let NumMicroOps = 4; 579 } 580 def : InstRW<[WriteMul16], (instregex "IMUL16r", "MUL16r")>; 581 582 // m16. 583 def WriteMul16Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> { 584 let Latency = 8; 585 let NumMicroOps = 5; 586 } 587 def : InstRW<[WriteMul16Ld], (instregex "IMUL16m", "MUL16m")>; 588 589 // r32. 590 def WriteMul32 : SchedWriteRes<[HWPort1, HWPort0156]> { 591 let Latency = 4; 592 let NumMicroOps = 3; 593 } 594 def : InstRW<[WriteMul32], (instregex "IMUL32r", "MUL32r")>; 595 596 // m32. 597 def WriteMul32Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> { 598 let Latency = 8; 599 let NumMicroOps = 4; 600 } 601 def : InstRW<[WriteMul32Ld], (instregex "IMUL32m", "MUL32m")>; 602 603 // r64. 604 def WriteMul64 : SchedWriteRes<[HWPort1, HWPort6]> { 605 let Latency = 3; 606 let NumMicroOps = 2; 607 } 608 def : InstRW<[WriteMul64], (instregex "IMUL64r", "MUL64r")>; 609 610 // m64. 611 def WriteMul64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> { 612 let Latency = 7; 613 let NumMicroOps = 3; 614 } 615 def : InstRW<[WriteMul64Ld], (instregex "IMUL64m", "MUL64m")>; 616 617 // r16,r16. 618 def WriteMul16rri : SchedWriteRes<[HWPort1, HWPort0156]> { 619 let Latency = 4; 620 let NumMicroOps = 2; 621 } 622 def : InstRW<[WriteMul16rri], (instregex "IMUL16rri", "IMUL16rri8")>; 623 624 // r16,m16. 625 def WriteMul16rmi : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> { 626 let Latency = 8; 627 let NumMicroOps = 3; 628 } 629 def : InstRW<[WriteMul16rmi], (instregex "IMUL16rmi", "IMUL16rmi8")>; 630 631 // MULX. 632 // r32,r32,r32. 633 def WriteMulX32 : SchedWriteRes<[HWPort1, HWPort056]> { 634 let Latency = 4; 635 let NumMicroOps = 3; 636 let ResourceCycles = [1, 2]; 637 } 638 def : InstRW<[WriteMulX32], (instregex "MULX32rr")>; 639 640 // r32,r32,m32. 641 def WriteMulX32Ld : SchedWriteRes<[HWPort1, HWPort056, HWPort23]> { 642 let Latency = 8; 643 let NumMicroOps = 4; 644 let ResourceCycles = [1, 2, 1]; 645 } 646 def : InstRW<[WriteMulX32Ld], (instregex "MULX32rm")>; 647 648 // r64,r64,r64. 649 def WriteMulX64 : SchedWriteRes<[HWPort1, HWPort6]> { 650 let Latency = 4; 651 let NumMicroOps = 2; 652 } 653 def : InstRW<[WriteMulX64], (instregex "MULX64rr")>; 654 655 // r64,r64,m64. 656 def WriteMulX64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> { 657 let Latency = 8; 658 let NumMicroOps = 3; 659 } 660 def : InstRW<[WriteMulX64Ld], (instregex "MULX64rm")>; 661 662 // DIV. 663 // r8. 664 def WriteDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { 665 let Latency = 22; 666 let NumMicroOps = 9; 667 } 668 def : InstRW<[WriteDiv8], (instregex "DIV8r")>; 669 670 // r16. 671 def WriteDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { 672 let Latency = 23; 673 let NumMicroOps = 10; 674 } 675 def : InstRW<[WriteDiv16], (instregex "DIV16r")>; 676 677 // r32. 678 def WriteDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { 679 let Latency = 22; 680 let NumMicroOps = 10; 681 } 682 def : InstRW<[WriteDiv32], (instregex "DIV32r")>; 683 684 // r64. 685 def WriteDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { 686 let Latency = 32; 687 let NumMicroOps = 36; 688 } 689 def : InstRW<[WriteDiv64], (instregex "DIV64r")>; 690 691 // IDIV. 692 // r8. 693 def WriteIDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { 694 let Latency = 23; 695 let NumMicroOps = 9; 696 } 697 def : InstRW<[WriteIDiv8], (instregex "IDIV8r")>; 698 699 // r16. 700 def WriteIDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { 701 let Latency = 23; 702 let NumMicroOps = 10; 703 } 704 def : InstRW<[WriteIDiv16], (instregex "IDIV16r")>; 705 706 // r32. 707 def WriteIDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { 708 let Latency = 22; 709 let NumMicroOps = 9; 710 } 711 def : InstRW<[WriteIDiv32], (instregex "IDIV32r")>; 712 713 // r64. 714 def WriteIDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { 715 let Latency = 39; 716 let NumMicroOps = 59; 717 } 718 def : InstRW<[WriteIDiv64], (instregex "IDIV64r")>; 719 720 //-- Logic instructions --// 721 722 // AND OR XOR. 723 // m,r/i. 724 def : InstRW<[Write2P0156_2P237_P4], 725 (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)", 726 "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>; 727 728 // SHR SHL SAR. 729 // m,i. 730 def WriteShiftRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { 731 let NumMicroOps = 4; 732 let ResourceCycles = [2, 1, 1]; 733 } 734 def : InstRW<[WriteShiftRMW], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>; 735 736 // r,cl. 737 def : InstRW<[Write3P06_Lat2], (instregex "S(A|H)(R|L)(8|16|32|64)rCL")>; 738 739 // m,cl. 740 def WriteShiftClLdRMW : SchedWriteRes<[HWPort06, HWPort23, HWPort4]> { 741 let NumMicroOps = 6; 742 let ResourceCycles = [3, 2, 1]; 743 } 744 def : InstRW<[WriteShiftClLdRMW], (instregex "S(A|H)(R|L)(8|16|32|64)mCL")>; 745 746 // ROR ROL. 747 // r,1. 748 def : InstRW<[Write2P06], (instregex "RO(R|L)(8|16|32|64)r1")>; 749 750 // m,i. 751 def WriteRotateRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { 752 let NumMicroOps = 5; 753 let ResourceCycles = [2, 2, 1]; 754 } 755 def : InstRW<[WriteRotateRMW], (instregex "RO(R|L)(8|16|32|64)mi")>; 756 757 // r,cl. 758 def : InstRW<[Write3P06_Lat2], (instregex "RO(R|L)(8|16|32|64)rCL")>; 759 760 // m,cl. 761 def WriteRotateRMWCL : SchedWriteRes<[]> { 762 let NumMicroOps = 6; 763 } 764 def : InstRW<[WriteRotateRMWCL], (instregex "RO(R|L)(8|16|32|64)mCL")>; 765 766 // RCR RCL. 767 // r,1. 768 def WriteRCr1 : SchedWriteRes<[HWPort06, HWPort0156]> { 769 let Latency = 2; 770 let NumMicroOps = 3; 771 let ResourceCycles = [2, 1]; 772 } 773 def : InstRW<[WriteRCr1], (instregex "RC(R|L)(8|16|32|64)r1")>; 774 775 // m,1. 776 def WriteRCm1 : SchedWriteRes<[]> { 777 let NumMicroOps = 6; 778 } 779 def : InstRW<[WriteRCm1], (instregex "RC(R|L)(8|16|32|64)m1")>; 780 781 // r,i. 782 def WriteRCri : SchedWriteRes<[HWPort0156]> { 783 let Latency = 6; 784 let NumMicroOps = 8; 785 } 786 def : InstRW<[WriteRCri], (instregex "RC(R|L)(8|16|32|64)r(i|CL)")>; 787 788 // m,i. 789 def WriteRCmi : SchedWriteRes<[]> { 790 let NumMicroOps = 11; 791 } 792 def : InstRW<[WriteRCmi], (instregex "RC(R|L)(8|16|32|64)m(i|CL)")>; 793 794 // SHRD SHLD. 795 // r,r,i. 796 def WriteShDrr : SchedWriteRes<[HWPort1]> { 797 let Latency = 3; 798 } 799 def : InstRW<[WriteShDrr], (instregex "SH(R|L)D(16|32|64)rri8")>; 800 801 // m,r,i. 802 def WriteShDmr : SchedWriteRes<[]> { 803 let NumMicroOps = 5; 804 } 805 def : InstRW<[WriteShDmr], (instregex "SH(R|L)D(16|32|64)mri8")>; 806 807 // r,r,cl. 808 def WriteShlDCL : SchedWriteRes<[HWPort0156]> { 809 let Latency = 3; 810 let NumMicroOps = 4; 811 } 812 def : InstRW<[WriteShlDCL], (instregex "SHLD(16|32|64)rrCL")>; 813 814 // r,r,cl. 815 def WriteShrDCL : SchedWriteRes<[HWPort0156]> { 816 let Latency = 4; 817 let NumMicroOps = 4; 818 } 819 def : InstRW<[WriteShrDCL], (instregex "SHRD(16|32|64)rrCL")>; 820 821 // m,r,cl. 822 def WriteShDmrCL : SchedWriteRes<[]> { 823 let NumMicroOps = 7; 824 } 825 def : InstRW<[WriteShDmrCL], (instregex "SH(R|L)D(16|32|64)mrCL")>; 826 827 // BT. 828 // r,r/i. 829 def : InstRW<[WriteShift], (instregex "BT(16|32|64)r(r|i8)")>; 830 831 // m,r. 832 def WriteBTmr : SchedWriteRes<[]> { 833 let NumMicroOps = 10; 834 } 835 def : InstRW<[WriteBTmr], (instregex "BT(16|32|64)mr")>; 836 837 // m,i. 838 def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>; 839 840 // BTR BTS BTC. 841 // r,r,i. 842 def : InstRW<[WriteShift], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>; 843 844 // m,r. 845 def WriteBTRSCmr : SchedWriteRes<[]> { 846 let NumMicroOps = 11; 847 } 848 def : InstRW<[WriteBTRSCmr], (instregex "BT(R|S|C)(16|32|64)mr")>; 849 850 // m,i. 851 def : InstRW<[WriteShiftLd], (instregex "BT(R|S|C)(16|32|64)mi8")>; 852 853 // BSF BSR. 854 // r,r. 855 def : InstRW<[WriteP1_Lat3], (instregex "BS(R|F)(16|32|64)rr")>; 856 // r,m. 857 def : InstRW<[WriteP1_Lat3Ld], (instregex "BS(R|F)(16|32|64)rm")>; 858 859 // SETcc. 860 // r. 861 def : InstRW<[WriteShift], 862 (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)r")>; 863 // m. 864 def WriteSetCCm : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { 865 let NumMicroOps = 3; 866 } 867 def : InstRW<[WriteSetCCm], 868 (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)m")>; 869 870 // CLD STD. 871 def WriteCldStd : SchedWriteRes<[HWPort15, HWPort6]> { 872 let NumMicroOps = 3; 873 } 874 def : InstRW<[WriteCldStd], (instregex "STD", "CLD")>; 875 876 // LZCNT TZCNT. 877 // r,r. 878 def : InstRW<[WriteP1_Lat3], (instregex "(L|TZCNT)(16|32|64)rr")>; 879 // r,m. 880 def : InstRW<[WriteP1_Lat3Ld], (instregex "(L|TZCNT)(16|32|64)rm")>; 881 882 // ANDN. 883 // r,r. 884 def : InstRW<[WriteP15], (instregex "ANDN(32|64)rr")>; 885 // r,m. 886 def : InstRW<[WriteP15Ld], (instregex "ANDN(32|64)rm")>; 887 888 // BLSI BLSMSK BLSR. 889 // r,r. 890 def : InstRW<[WriteP15], (instregex "BLS(I|MSK|R)(32|64)rr")>; 891 // r,m. 892 def : InstRW<[WriteP15Ld], (instregex "BLS(I|MSK|R)(32|64)rm")>; 893 894 // BEXTR. 895 // r,r,r. 896 def : InstRW<[Write2P0156_Lat2], (instregex "BEXTR(32|64)rr")>; 897 // r,m,r. 898 def : InstRW<[Write2P0156_Lat2Ld], (instregex "BEXTR(32|64)rm")>; 899 900 // BZHI. 901 // r,r,r. 902 def : InstRW<[WriteP15], (instregex "BZHI(32|64)rr")>; 903 // r,m,r. 904 def : InstRW<[WriteP15Ld], (instregex "BZHI(32|64)rm")>; 905 906 // PDEP PEXT. 907 // r,r,r. 908 def : InstRW<[WriteP1_Lat3], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>; 909 // r,m,r. 910 def : InstRW<[WriteP1_Lat3Ld], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>; 911 912 //-- Control transfer instructions --// 913 914 // J(E|R)CXZ. 915 def WriteJCXZ : SchedWriteRes<[HWPort0156, HWPort6]> { 916 let NumMicroOps = 2; 917 } 918 def : InstRW<[WriteJCXZ], (instregex "JCXZ", "JECXZ_(32|64)", "JRCXZ")>; 919 920 // LOOP. 921 def WriteLOOP : SchedWriteRes<[]> { 922 let NumMicroOps = 7; 923 } 924 def : InstRW<[WriteLOOP], (instregex "LOOP")>; 925 926 // LOOP(N)E 927 def WriteLOOPE : SchedWriteRes<[]> { 928 let NumMicroOps = 11; 929 } 930 def : InstRW<[WriteLOOPE], (instregex "LOOPE", "LOOPNE")>; 931 932 // CALL. 933 // r. 934 def WriteCALLr : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> { 935 let NumMicroOps = 3; 936 } 937 def : InstRW<[WriteCALLr], (instregex "CALL(16|32)r")>; 938 939 // m. 940 def WriteCALLm : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> { 941 let NumMicroOps = 4; 942 let ResourceCycles = [2, 1, 1]; 943 } 944 def : InstRW<[WriteCALLm], (instregex "CALL(16|32)m")>; 945 946 // RET. 947 def WriteRET : SchedWriteRes<[HWPort237, HWPort6]> { 948 let NumMicroOps = 2; 949 } 950 def : InstRW<[WriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)")>; 951 952 // i. 953 def WriteRETI : SchedWriteRes<[HWPort23, HWPort6, HWPort015]> { 954 let NumMicroOps = 4; 955 let ResourceCycles = [1, 2, 1]; 956 } 957 def : InstRW<[WriteRETI], (instregex "RETI(L|Q|W)", "LRETI(L|Q|W)")>; 958 959 // BOUND. 960 // r,m. 961 def WriteBOUND : SchedWriteRes<[]> { 962 let NumMicroOps = 15; 963 } 964 def : InstRW<[WriteBOUND], (instregex "BOUNDS(16|32)rm")>; 965 966 // INTO. 967 def WriteINTO : SchedWriteRes<[]> { 968 let NumMicroOps = 4; 969 } 970 def : InstRW<[WriteINTO], (instregex "INTO")>; 971 972 //-- String instructions --// 973 974 // LODSB/W. 975 def : InstRW<[Write2P0156_P23], (instregex "LODS(B|W)")>; 976 977 // LODSD/Q. 978 def : InstRW<[WriteP0156_P23], (instregex "LODS(L|Q)")>; 979 980 // STOS. 981 def WriteSTOS : SchedWriteRes<[HWPort23, HWPort0156, HWPort4]> { 982 let NumMicroOps = 3; 983 } 984 def : InstRW<[WriteSTOS], (instregex "STOS(B|L|Q|W)")>; 985 986 // MOVS. 987 def WriteMOVS : SchedWriteRes<[HWPort23, HWPort4, HWPort0156]> { 988 let Latency = 4; 989 let NumMicroOps = 5; 990 let ResourceCycles = [2, 1, 2]; 991 } 992 def : InstRW<[WriteMOVS], (instregex "MOVS(B|L|Q|W)")>; 993 994 // SCAS. 995 def : InstRW<[Write2P0156_P23], (instregex "SCAS(B|W|L|Q)")>; 996 997 // CMPS. 998 def WriteCMPS : SchedWriteRes<[HWPort23, HWPort0156]> { 999 let Latency = 4; 1000 let NumMicroOps = 5; 1001 let ResourceCycles = [2, 3]; 1002 } 1003 def : InstRW<[WriteCMPS], (instregex "CMPS(B|L|Q|W)")>; 1004 1005 //-- Synchronization instructions --// 1006 1007 // XADD. 1008 def WriteXADD : SchedWriteRes<[]> { 1009 let NumMicroOps = 5; 1010 } 1011 def : InstRW<[WriteXADD], (instregex "XADD(8|16|32|64)rm")>; 1012 1013 // CMPXCHG. 1014 def WriteCMPXCHG : SchedWriteRes<[]> { 1015 let NumMicroOps = 6; 1016 } 1017 def : InstRW<[WriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>; 1018 1019 // CMPXCHG8B. 1020 def WriteCMPXCHG8B : SchedWriteRes<[]> { 1021 let NumMicroOps = 15; 1022 } 1023 def : InstRW<[WriteCMPXCHG8B], (instregex "CMPXCHG8B")>; 1024 1025 // CMPXCHG16B. 1026 def WriteCMPXCHG16B : SchedWriteRes<[]> { 1027 let NumMicroOps = 22; 1028 } 1029 def : InstRW<[WriteCMPXCHG16B], (instregex "CMPXCHG16B")>; 1030 1031 //-- Other --// 1032 1033 // PAUSE. 1034 def WritePAUSE : SchedWriteRes<[HWPort05, HWPort6]> { 1035 let NumMicroOps = 5; 1036 let ResourceCycles = [1, 3]; 1037 } 1038 def : InstRW<[WritePAUSE], (instregex "PAUSE")>; 1039 1040 // LEAVE. 1041 def : InstRW<[Write2P0156_P23], (instregex "LEAVE")>; 1042 1043 // XGETBV. 1044 def WriteXGETBV : SchedWriteRes<[]> { 1045 let NumMicroOps = 8; 1046 } 1047 def : InstRW<[WriteXGETBV], (instregex "XGETBV")>; 1048 1049 // RDTSC. 1050 def WriteRDTSC : SchedWriteRes<[]> { 1051 let NumMicroOps = 15; 1052 } 1053 def : InstRW<[WriteRDTSC], (instregex "RDTSC")>; 1054 1055 // RDPMC. 1056 def WriteRDPMC : SchedWriteRes<[]> { 1057 let NumMicroOps = 34; 1058 } 1059 def : InstRW<[WriteRDPMC], (instregex "RDPMC")>; 1060 1061 // RDRAND. 1062 def WriteRDRAND : SchedWriteRes<[HWPort23, HWPort015]> { 1063 let NumMicroOps = 17; 1064 let ResourceCycles = [1, 16]; 1065 } 1066 def : InstRW<[WriteRDRAND], (instregex "RDRAND(16|32|64)r")>; 1067 1068 //=== Floating Point x87 Instructions ===// 1069 //-- Move instructions --// 1070 1071 // FLD. 1072 // m80. 1073 def : InstRW<[WriteP01], (instregex "LD_Frr")>; 1074 1075 def WriteLD_F80m : SchedWriteRes<[HWPort01, HWPort23]> { 1076 let Latency = 4; 1077 let NumMicroOps = 4; 1078 let ResourceCycles = [2, 2]; 1079 } 1080 def : InstRW<[WriteLD_F80m], (instregex "LD_F80m")>; 1081 1082 // FBLD. 1083 // m80. 1084 def WriteFBLD : SchedWriteRes<[]> { 1085 let Latency = 47; 1086 let NumMicroOps = 43; 1087 } 1088 def : InstRW<[WriteFBLD], (instregex "FBLDm")>; 1089 1090 // FST(P). 1091 // r. 1092 def : InstRW<[WriteP01], (instregex "ST_(F|FP)rr")>; 1093 1094 // m80. 1095 def WriteST_FP80m : SchedWriteRes<[HWPort0156, HWPort23, HWPort4]> { 1096 let NumMicroOps = 7; 1097 let ResourceCycles = [3, 2, 2]; 1098 } 1099 def : InstRW<[WriteST_FP80m], (instregex "ST_FP80m")>; 1100 1101 // FBSTP. 1102 // m80. 1103 def WriteFBSTP : SchedWriteRes<[]> { 1104 let NumMicroOps = 226; 1105 } 1106 def : InstRW<[WriteFBSTP], (instregex "FBSTPm")>; 1107 1108 // FXCHG. 1109 def : InstRW<[WriteNop], (instregex "XCH_F")>; 1110 1111 // FILD. 1112 def WriteFILD : SchedWriteRes<[HWPort01, HWPort23]> { 1113 let Latency = 6; 1114 let NumMicroOps = 2; 1115 } 1116 def : InstRW<[WriteFILD], (instregex "ILD_F(16|32|64)m")>; 1117 1118 // FIST(P) FISTTP. 1119 def WriteFIST : SchedWriteRes<[HWPort1, HWPort23, HWPort4]> { 1120 let Latency = 7; 1121 let NumMicroOps = 3; 1122 } 1123 def : InstRW<[WriteFIST], (instregex "IST_(F|FP)(16|32)m")>; 1124 1125 // FLDZ. 1126 def : InstRW<[WriteP01], (instregex "LD_F0")>; 1127 1128 // FLD1. 1129 def : InstRW<[Write2P01], (instregex "LD_F1")>; 1130 1131 // FLDPI FLDL2E etc. 1132 def : InstRW<[Write2P01], (instregex "FLDPI", "FLDL2(T|E)" "FLDL(G|N)2")>; 1133 1134 // FCMOVcc. 1135 def WriteFCMOVcc : SchedWriteRes<[HWPort0, HWPort5]> { 1136 let Latency = 2; 1137 let NumMicroOps = 3; 1138 let ResourceCycles = [2, 1]; 1139 } 1140 def : InstRW<[WriteFCMOVcc], (instregex "CMOV(B|BE|P|NB|NBE|NE|NP)_F")>; 1141 1142 // FNSTSW. 1143 // AX. 1144 def WriteFNSTSW : SchedWriteRes<[HWPort0, HWPort0156]> { 1145 let NumMicroOps = 2; 1146 } 1147 def : InstRW<[WriteFNSTSW], (instregex "FNSTSW16r")>; 1148 1149 // m16. 1150 def WriteFNSTSWm : SchedWriteRes<[HWPort0, HWPort4, HWPort237]> { 1151 let Latency = 6; 1152 let NumMicroOps = 3; 1153 } 1154 def : InstRW<[WriteFNSTSWm], (instregex "FNSTSWm")>; 1155 1156 // FLDCW. 1157 def WriteFLDCW : SchedWriteRes<[HWPort01, HWPort23, HWPort6]> { 1158 let Latency = 7; 1159 let NumMicroOps = 3; 1160 } 1161 def : InstRW<[WriteFLDCW], (instregex "FLDCW16m")>; 1162 1163 // FNSTCW. 1164 def WriteFNSTCW : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> { 1165 let NumMicroOps = 3; 1166 } 1167 def : InstRW<[WriteFNSTCW], (instregex "FNSTCW16m")>; 1168 1169 // FINCSTP FDECSTP. 1170 def : InstRW<[WriteP01], (instregex "FINCSTP", "FDECSTP")>; 1171 1172 // FFREE. 1173 def : InstRW<[WriteP01], (instregex "FFREE")>; 1174 1175 // FNSAVE. 1176 def WriteFNSAVE : SchedWriteRes<[]> { 1177 let NumMicroOps = 147; 1178 } 1179 def : InstRW<[WriteFNSAVE], (instregex "FSAVEm")>; 1180 1181 // FRSTOR. 1182 def WriteFRSTOR : SchedWriteRes<[]> { 1183 let NumMicroOps = 90; 1184 } 1185 def : InstRW<[WriteFRSTOR], (instregex "FRSTORm")>; 1186 1187 //-- Arithmetic instructions --// 1188 1189 // FABS. 1190 def : InstRW<[WriteP0], (instregex "ABS_F")>; 1191 1192 // FCHS. 1193 def : InstRW<[WriteP0], (instregex "CHS_F")>; 1194 1195 // FCOM(P) FUCOM(P). 1196 // r. 1197 def : InstRW<[WriteP1], (instregex "COM_FST0r", "COMP_FST0r", "UCOM_Fr", 1198 "UCOM_FPr")>; 1199 // m. 1200 def : InstRW<[WriteP1_P23], (instregex "FCOM(32|64)m", "FCOMP(32|64)m")>; 1201 1202 // FCOMPP FUCOMPP. 1203 // r. 1204 def : InstRW<[Write2P01], (instregex "FCOMPP", "UCOM_FPPr")>; 1205 1206 // FCOMI(P) FUCOMI(P). 1207 // m. 1208 def : InstRW<[Write3P01], (instregex "COM_FIr", "COM_FIPr", "UCOM_FIr", 1209 "UCOM_FIPr")>; 1210 1211 // FICOM(P). 1212 def : InstRW<[Write2P1_P23], (instregex "FICOM(16|32)m", "FICOMP(16|32)m")>; 1213 1214 // FTST. 1215 def : InstRW<[WriteP1], (instregex "TST_F")>; 1216 1217 // FXAM. 1218 def : InstRW<[Write2P1], (instregex "FXAM")>; 1219 1220 // FPREM. 1221 def WriteFPREM : SchedWriteRes<[]> { 1222 let Latency = 19; 1223 let NumMicroOps = 28; 1224 } 1225 def : InstRW<[WriteFPREM], (instregex "FPREM")>; 1226 1227 // FPREM1. 1228 def WriteFPREM1 : SchedWriteRes<[]> { 1229 let Latency = 27; 1230 let NumMicroOps = 41; 1231 } 1232 def : InstRW<[WriteFPREM1], (instregex "FPREM1")>; 1233 1234 // FRNDINT. 1235 def WriteFRNDINT : SchedWriteRes<[]> { 1236 let Latency = 11; 1237 let NumMicroOps = 17; 1238 } 1239 def : InstRW<[WriteFRNDINT], (instregex "FRNDINT")>; 1240 1241 //-- Math instructions --// 1242 1243 // FSCALE. 1244 def WriteFSCALE : SchedWriteRes<[]> { 1245 let Latency = 75; // 49-125 1246 let NumMicroOps = 50; // 25-75 1247 } 1248 def : InstRW<[WriteFSCALE], (instregex "FSCALE")>; 1249 1250 // FXTRACT. 1251 def WriteFXTRACT : SchedWriteRes<[]> { 1252 let Latency = 15; 1253 let NumMicroOps = 17; 1254 } 1255 def : InstRW<[WriteFXTRACT], (instregex "FXTRACT")>; 1256 1257 //-- Other instructions --// 1258 1259 // FNOP. 1260 def : InstRW<[WriteP01], (instregex "FNOP")>; 1261 1262 // WAIT. 1263 def : InstRW<[Write2P01], (instregex "WAIT")>; 1264 1265 // FNCLEX. 1266 def : InstRW<[Write5P0156], (instregex "FNCLEX")>; 1267 1268 // FNINIT. 1269 def WriteFNINIT : SchedWriteRes<[]> { 1270 let NumMicroOps = 26; 1271 } 1272 def : InstRW<[WriteFNINIT], (instregex "FNINIT")>; 1273 1274 //=== Integer MMX and XMM Instructions ===// 1275 //-- Move instructions --// 1276 1277 // MOVD. 1278 // r32/64 <- (x)mm. 1279 def : InstRW<[WriteP0], (instregex "MMX_MOVD64grr", "MMX_MOVD64from64rr", 1280 "VMOVPDI2DIrr", "MOVPDI2DIrr")>; 1281 1282 // (x)mm <- r32/64. 1283 def : InstRW<[WriteP5], (instregex "MMX_MOVD64rr", "MMX_MOVD64to64rr", 1284 "VMOVDI2PDIrr", "MOVDI2PDIrr")>; 1285 1286 // MOVQ. 1287 // r64 <- (x)mm. 1288 def : InstRW<[WriteP0], (instregex "VMOVPQIto64rr")>; 1289 1290 // (x)mm <- r64. 1291 def : InstRW<[WriteP5], (instregex "VMOV64toPQIrr", "VMOVZQI2PQIrr")>; 1292 1293 // (x)mm <- (x)mm. 1294 def : InstRW<[WriteP015], (instregex "MMX_MOVQ64rr")>; 1295 1296 // (V)MOVDQA/U. 1297 // x <- x. 1298 def : InstRW<[WriteP015], (instregex "MOVDQ(A|U)rr", "VMOVDQ(A|U)rr", 1299 "MOVDQ(A|U)rr_REV", "VMOVDQ(A|U)rr_REV", 1300 "VMOVDQ(A|U)Yrr", "VMOVDQ(A|U)Yrr_REV")>; 1301 1302 // MOVDQ2Q. 1303 def : InstRW<[WriteP01_P5], (instregex "MMX_MOVDQ2Qrr")>; 1304 1305 // MOVQ2DQ. 1306 def : InstRW<[WriteP015], (instregex "MMX_MOVQ2DQrr")>; 1307 1308 1309 // PACKSSWB/DW. 1310 // mm <- mm. 1311 def WriteMMXPACKSSrr : SchedWriteRes<[HWPort5]> { 1312 let Latency = 2; 1313 let NumMicroOps = 3; 1314 let ResourceCycles = [3]; 1315 } 1316 def : InstRW<[WriteMMXPACKSSrr], (instregex "MMX_PACKSSDWirr", 1317 "MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>; 1318 1319 // mm <- m64. 1320 def WriteMMXPACKSSrm : SchedWriteRes<[HWPort23, HWPort5]> { 1321 let Latency = 4; 1322 let NumMicroOps = 3; 1323 let ResourceCycles = [1, 3]; 1324 } 1325 def : InstRW<[WriteMMXPACKSSrm], (instregex "MMX_PACKSSDWirm", 1326 "MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>; 1327 1328 // VPMOVSX/ZX BW BD BQ DW DQ. 1329 // y <- x. 1330 def WriteVPMOVSX : SchedWriteRes<[HWPort5]> { 1331 let Latency = 3; 1332 let NumMicroOps = 1; 1333 } 1334 def : InstRW<[WriteVPMOVSX], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>; 1335 1336 // PBLENDW. 1337 // x,x,i / v,v,v,i 1338 def WritePBLENDWr : SchedWriteRes<[HWPort5]>; 1339 def : InstRW<[WritePBLENDWr], (instregex "(V?)PBLENDW(Y?)rri")>; 1340 1341 // x,m,i / v,v,m,i 1342 def WritePBLENDWm : SchedWriteRes<[HWPort5, HWPort23]> { 1343 let NumMicroOps = 2; 1344 let Latency = 4; 1345 let ResourceCycles = [1, 1]; 1346 } 1347 def : InstRW<[WritePBLENDWm, ReadAfterLd], (instregex "(V?)PBLENDW(Y?)rmi")>; 1348 1349 // VPBLENDD. 1350 // v,v,v,i. 1351 def WriteVPBLENDDr : SchedWriteRes<[HWPort015]>; 1352 def : InstRW<[WriteVPBLENDDr], (instregex "VPBLENDD(Y?)rri")>; 1353 1354 // v,v,m,i 1355 def WriteVPBLENDDm : SchedWriteRes<[HWPort015, HWPort23]> { 1356 let NumMicroOps = 2; 1357 let Latency = 4; 1358 let ResourceCycles = [1, 1]; 1359 } 1360 def : InstRW<[WriteVPBLENDDm, ReadAfterLd], (instregex "VPBLENDD(Y?)rmi")>; 1361 1362 // MASKMOVQ. 1363 def WriteMASKMOVQ : SchedWriteRes<[HWPort0, HWPort4, HWPort23]> { 1364 let Latency = 13; 1365 let NumMicroOps = 4; 1366 let ResourceCycles = [1, 1, 2]; 1367 } 1368 def : InstRW<[WriteMASKMOVQ], (instregex "MMX_MASKMOVQ(64)?")>; 1369 1370 // MASKMOVDQU. 1371 def WriteMASKMOVDQU : SchedWriteRes<[HWPort04, HWPort56, HWPort23]> { 1372 let Latency = 14; 1373 let NumMicroOps = 10; 1374 let ResourceCycles = [4, 2, 4]; 1375 } 1376 def : InstRW<[WriteMASKMOVDQU], (instregex "(V?)MASKMOVDQU(64)?")>; 1377 1378 // VPMASKMOV D/Q. 1379 // v,v,m. 1380 def WriteVPMASKMOVr : SchedWriteRes<[HWPort5, HWPort23]> { 1381 let Latency = 4; 1382 let NumMicroOps = 3; 1383 let ResourceCycles = [2, 1]; 1384 } 1385 def : InstRW<[WriteVPMASKMOVr, ReadAfterLd], 1386 (instregex "VPMASKMOV(D|Q)(Y?)rm")>; 1387 1388 // m, v,v. 1389 def WriteVPMASKMOVm : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> { 1390 let Latency = 13; 1391 let NumMicroOps = 4; 1392 let ResourceCycles = [1, 1, 1, 1]; 1393 } 1394 def : InstRW<[WriteVPMASKMOVm], (instregex "VPMASKMOV(D|Q)(Y?)mr")>; 1395 1396 // PMOVMSKB. 1397 def WritePMOVMSKB : SchedWriteRes<[HWPort0]> { 1398 let Latency = 3; 1399 } 1400 def : InstRW<[WritePMOVMSKB], (instregex "(V|MMX_)?PMOVMSKB(Y?)rr")>; 1401 1402 // PEXTR B/W/D/Q. 1403 // r32,x,i. 1404 def WritePEXTRr : SchedWriteRes<[HWPort0, HWPort5]> { 1405 let Latency = 2; 1406 let NumMicroOps = 2; 1407 let ResourceCycles = [1, 1]; 1408 } 1409 def : InstRW<[WritePEXTRr], (instregex "PEXTR(B|W|D|Q)rr", "MMX_PEXTRWirri")>; 1410 1411 // m8,x,i. 1412 def WritePEXTRm : SchedWriteRes<[HWPort23, HWPort4, HWPort5]> { 1413 let NumMicroOps = 3; 1414 let ResourceCycles = [1, 1, 1]; 1415 } 1416 def : InstRW<[WritePEXTRm], (instregex "PEXTR(B|W|D|Q)mr")>; 1417 1418 // VPBROADCAST B/W. 1419 // x, m8/16. 1420 def WriteVPBROADCAST128Ld : SchedWriteRes<[HWPort01, HWPort23, HWPort5]> { 1421 let Latency = 5; 1422 let NumMicroOps = 3; 1423 let ResourceCycles = [1, 1, 1]; 1424 } 1425 def : InstRW<[WriteVPBROADCAST128Ld, ReadAfterLd], 1426 (instregex "VPBROADCAST(B|W)rm")>; 1427 1428 // y, m8/16 1429 def WriteVPBROADCAST256Ld : SchedWriteRes<[HWPort01, HWPort23, HWPort5]> { 1430 let Latency = 7; 1431 let NumMicroOps = 3; 1432 let ResourceCycles = [1, 1, 1]; 1433 } 1434 def : InstRW<[WriteVPBROADCAST256Ld, ReadAfterLd], 1435 (instregex "VPBROADCAST(B|W)Yrm")>; 1436 1437 // VPGATHERDD. 1438 // x. 1439 def WriteVPGATHERDD128 : SchedWriteRes<[]> { 1440 let NumMicroOps = 20; 1441 } 1442 def : InstRW<[WriteVPGATHERDD128, ReadAfterLd], (instregex "VPGATHERDDrm")>; 1443 1444 // y. 1445 def WriteVPGATHERDD256 : SchedWriteRes<[]> { 1446 let NumMicroOps = 34; 1447 } 1448 def : InstRW<[WriteVPGATHERDD256, ReadAfterLd], (instregex "VPGATHERDDYrm")>; 1449 1450 // VPGATHERQD. 1451 // x. 1452 def WriteVPGATHERQD128 : SchedWriteRes<[]> { 1453 let NumMicroOps = 15; 1454 } 1455 def : InstRW<[WriteVPGATHERQD128, ReadAfterLd], (instregex "VPGATHERQDrm")>; 1456 1457 // y. 1458 def WriteVPGATHERQD256 : SchedWriteRes<[]> { 1459 let NumMicroOps = 22; 1460 } 1461 def : InstRW<[WriteVPGATHERQD256, ReadAfterLd], (instregex "VPGATHERQDYrm")>; 1462 1463 // VPGATHERDQ. 1464 // x. 1465 def WriteVPGATHERDQ128 : SchedWriteRes<[]> { 1466 let NumMicroOps = 12; 1467 } 1468 def : InstRW<[WriteVPGATHERDQ128, ReadAfterLd], (instregex "VPGATHERDQrm")>; 1469 1470 // y. 1471 def WriteVPGATHERDQ256 : SchedWriteRes<[]> { 1472 let NumMicroOps = 20; 1473 } 1474 def : InstRW<[WriteVPGATHERDQ256, ReadAfterLd], (instregex "VPGATHERDQYrm")>; 1475 1476 // VPGATHERQQ. 1477 // x. 1478 def WriteVPGATHERQQ128 : SchedWriteRes<[]> { 1479 let NumMicroOps = 14; 1480 } 1481 def : InstRW<[WriteVPGATHERQQ128, ReadAfterLd], (instregex "VPGATHERQQrm")>; 1482 1483 // y. 1484 def WriteVPGATHERQQ256 : SchedWriteRes<[]> { 1485 let NumMicroOps = 22; 1486 } 1487 def : InstRW<[WriteVPGATHERQQ256, ReadAfterLd], (instregex "VPGATHERQQYrm")>; 1488 1489 //-- Arithmetic instructions --// 1490 1491 // PHADD|PHSUB (S) W/D. 1492 // v <- v,v. 1493 def WritePHADDSUBr : SchedWriteRes<[HWPort1, HWPort5]> { 1494 let Latency = 3; 1495 let NumMicroOps = 3; 1496 let ResourceCycles = [1, 2]; 1497 } 1498 def : InstRW<[WritePHADDSUBr], (instregex "MMX_PHADD(W?)rr64", 1499 "MMX_PHADDSWrr64", 1500 "MMX_PHSUB(W|D)rr64", 1501 "MMX_PHSUBSWrr64", 1502 "(V?)PH(ADD|SUB)(W|D)(Y?)rr", 1503 "(V?)PH(ADD|SUB)SWrr(256)?")>; 1504 1505 // v <- v,m. 1506 def WritePHADDSUBm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { 1507 let Latency = 6; 1508 let NumMicroOps = 3; 1509 let ResourceCycles = [1, 2, 1]; 1510 } 1511 def : InstRW<[WritePHADDSUBm, ReadAfterLd], 1512 (instregex "MMX_PHADD(W?)rm64", 1513 "MMX_PHADDSWrm64", 1514 "MMX_PHSUB(W|D)rm64", 1515 "MMX_PHSUBSWrm64", 1516 "(V?)PH(ADD|SUB)(W|D)(Y?)rm", 1517 "(V?)PH(ADD|SUB)SWrm(128|256)?")>; 1518 1519 // PCMPGTQ. 1520 // v <- v,v. 1521 def WritePCMPGTQr : SchedWriteRes<[HWPort0]> { 1522 let Latency = 5; 1523 let NumMicroOps = 1; 1524 } 1525 def : InstRW<[WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>; 1526 1527 // v <- v,m. 1528 def WritePCMPGTQm : SchedWriteRes<[HWPort0, HWPort23]> { 1529 let Latency = 5; 1530 let NumMicroOps = 2; 1531 let ResourceCycles = [1, 1]; 1532 } 1533 def : InstRW<[WritePCMPGTQm, ReadAfterLd], (instregex "(V?)PCMPGTQ(Y?)rm")>; 1534 1535 // PMULLD. 1536 // x,x / y,y,y. 1537 def WritePMULLDr : SchedWriteRes<[HWPort0]> { 1538 let Latency = 10; 1539 let NumMicroOps = 2; 1540 let ResourceCycles = [2]; 1541 } 1542 def : InstRW<[WritePMULLDr], (instregex "(V?)PMULLD(Y?)rr")>; 1543 1544 // x,m / y,y,m. 1545 def WritePMULLDm : SchedWriteRes<[HWPort0, HWPort23]> { 1546 let Latency = 10; 1547 let NumMicroOps = 3; 1548 let ResourceCycles = [2, 1]; 1549 } 1550 def : InstRW<[WritePMULLDm, ReadAfterLd], (instregex "(V?)PMULLD(Y?)rm")>; 1551 1552 //-- Logic instructions --// 1553 1554 // PTEST. 1555 // v,v. 1556 def WritePTESTr : SchedWriteRes<[HWPort0, HWPort5]> { 1557 let Latency = 2; 1558 let NumMicroOps = 2; 1559 let ResourceCycles = [1, 1]; 1560 } 1561 def : InstRW<[WritePTESTr], (instregex "(V?)PTEST(Y?)rr")>; 1562 1563 // v,m. 1564 def WritePTESTm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> { 1565 let Latency = 6; 1566 let NumMicroOps = 3; 1567 let ResourceCycles = [1, 1, 1]; 1568 } 1569 def : InstRW<[WritePTESTr], (instregex "(V?)PTEST(Y?)rm")>; 1570 1571 // PSLL,PSRL,PSRA W/D/Q. 1572 // x,x / v,v,x. 1573 def WritePShift : SchedWriteRes<[HWPort0, HWPort5]> { 1574 let Latency = 2; 1575 let NumMicroOps = 2; 1576 let ResourceCycles = [1, 1]; 1577 } 1578 def : InstRW<[WritePShift], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)(Y?)rr")>; 1579 1580 // PSLL,PSRL DQ. 1581 def : InstRW<[WriteP5], (instregex "(V?)PS(R|L)LDQ(Y?)ri")>; 1582 1583 //-- Other --// 1584 1585 // EMMS. 1586 def WriteEMMS : SchedWriteRes<[]> { 1587 let Latency = 13; 1588 let NumMicroOps = 31; 1589 } 1590 def : InstRW<[WriteEMMS], (instregex "MMX_EMMS")>; 1591 1592 //=== Floating Point XMM and YMM Instructions ===// 1593 //-- Move instructions --// 1594 1595 // MOVMSKP S/D. 1596 // r32 <- x. 1597 def WriteMOVMSKPr : SchedWriteRes<[HWPort0]> { 1598 let Latency = 3; 1599 } 1600 def : InstRW<[WriteMOVMSKPr], (instregex "(V?)MOVMSKP(S|D)rr")>; 1601 1602 // r32 <- y. 1603 def WriteVMOVMSKPYr : SchedWriteRes<[HWPort0]> { 1604 let Latency = 2; 1605 } 1606 def : InstRW<[WriteVMOVMSKPYr], (instregex "VMOVMSKP(S|D)Yrr")>; 1607 1608 // VPERM2F128. 1609 def : InstRW<[WriteFShuffle256], (instregex "VPERM2F128rr")>; 1610 def : InstRW<[WriteFShuffle256Ld, ReadAfterLd], (instregex "VPERM2F128rm")>; 1611 1612 // BLENDVP S/D. 1613 def : InstRW<[WriteFVarBlend], (instregex "BLENDVP(S|D)rr0")>; 1614 def : InstRW<[WriteFVarBlendLd, ReadAfterLd], (instregex "BLENDVP(S|D)rm0")>; 1615 1616 // VBROADCASTF128. 1617 def : InstRW<[WriteLoad], (instregex "VBROADCASTF128")>; 1618 1619 // EXTRACTPS. 1620 // r32,x,i. 1621 def WriteEXTRACTPSr : SchedWriteRes<[HWPort0, HWPort5]> { 1622 let NumMicroOps = 2; 1623 let ResourceCycles = [1, 1]; 1624 } 1625 def : InstRW<[WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>; 1626 1627 // m32,x,i. 1628 def WriteEXTRACTPSm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> { 1629 let Latency = 4; 1630 let NumMicroOps = 3; 1631 let ResourceCycles = [1, 1, 1]; 1632 } 1633 def : InstRW<[WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>; 1634 1635 // VEXTRACTF128. 1636 // x,y,i. 1637 def : InstRW<[WriteFShuffle256], (instregex "VEXTRACTF128rr")>; 1638 1639 // m128,y,i. 1640 def WriteVEXTRACTF128m : SchedWriteRes<[HWPort23, HWPort4]> { 1641 let Latency = 4; 1642 let NumMicroOps = 2; 1643 let ResourceCycles = [1, 1]; 1644 } 1645 def : InstRW<[WriteVEXTRACTF128m], (instregex "VEXTRACTF128mr")>; 1646 1647 // VINSERTF128. 1648 // y,y,x,i. 1649 def : InstRW<[WriteFShuffle256], (instregex "VINSERTF128rr")>; 1650 1651 // y,y,m128,i. 1652 def WriteVINSERTF128m : SchedWriteRes<[HWPort015, HWPort23]> { 1653 let Latency = 4; 1654 let NumMicroOps = 2; 1655 let ResourceCycles = [1, 1]; 1656 } 1657 def : InstRW<[WriteFShuffle256, ReadAfterLd], (instregex "VINSERTF128rm")>; 1658 1659 // VMASKMOVP S/D. 1660 // v,v,m. 1661 def WriteVMASKMOVPrm : SchedWriteRes<[HWPort5, HWPort23]> { 1662 let Latency = 4; 1663 let NumMicroOps = 3; 1664 let ResourceCycles = [2, 1]; 1665 } 1666 def : InstRW<[WriteVMASKMOVPrm], (instregex "VMASKMOVP(S|D)(Y?)rm")>; 1667 1668 // m128,x,x. 1669 def WriteVMASKMOVPmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> { 1670 let Latency = 13; 1671 let NumMicroOps = 4; 1672 let ResourceCycles = [1, 1, 1, 1]; 1673 } 1674 def : InstRW<[WriteVMASKMOVPmr], (instregex "VMASKMOVP(S|D)mr")>; 1675 1676 // m256,y,y. 1677 def WriteVMASKMOVPYmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> { 1678 let Latency = 14; 1679 let NumMicroOps = 4; 1680 let ResourceCycles = [1, 1, 1, 1]; 1681 } 1682 def : InstRW<[WriteVMASKMOVPYmr], (instregex "VMASKMOVP(S|D)Ymr")>; 1683 1684 // VGATHERDPS. 1685 // x. 1686 def WriteVGATHERDPS128 : SchedWriteRes<[]> { 1687 let NumMicroOps = 20; 1688 } 1689 def : InstRW<[WriteVGATHERDPS128, ReadAfterLd], (instregex "VGATHERDPSrm")>; 1690 1691 // y. 1692 def WriteVGATHERDPS256 : SchedWriteRes<[]> { 1693 let NumMicroOps = 34; 1694 } 1695 def : InstRW<[WriteVGATHERDPS256, ReadAfterLd], (instregex "VGATHERDPSYrm")>; 1696 1697 // VGATHERQPS. 1698 // x. 1699 def WriteVGATHERQPS128 : SchedWriteRes<[]> { 1700 let NumMicroOps = 15; 1701 } 1702 def : InstRW<[WriteVGATHERQPS128, ReadAfterLd], (instregex "VGATHERQPSrm")>; 1703 1704 // y. 1705 def WriteVGATHERQPS256 : SchedWriteRes<[]> { 1706 let NumMicroOps = 22; 1707 } 1708 def : InstRW<[WriteVGATHERQPS256, ReadAfterLd], (instregex "VGATHERQPSYrm")>; 1709 1710 // VGATHERDPD. 1711 // x. 1712 def WriteVGATHERDPD128 : SchedWriteRes<[]> { 1713 let NumMicroOps = 12; 1714 } 1715 def : InstRW<[WriteVGATHERDPD128, ReadAfterLd], (instregex "VGATHERDPDrm")>; 1716 1717 // y. 1718 def WriteVGATHERDPD256 : SchedWriteRes<[]> { 1719 let NumMicroOps = 20; 1720 } 1721 def : InstRW<[WriteVGATHERDPD256, ReadAfterLd], (instregex "VGATHERDPDYrm")>; 1722 1723 // VGATHERQPD. 1724 // x. 1725 def WriteVGATHERQPD128 : SchedWriteRes<[]> { 1726 let NumMicroOps = 14; 1727 } 1728 def : InstRW<[WriteVGATHERQPD128, ReadAfterLd], (instregex "VGATHERQPDrm")>; 1729 1730 // y. 1731 def WriteVGATHERQPD256 : SchedWriteRes<[]> { 1732 let NumMicroOps = 22; 1733 } 1734 def : InstRW<[WriteVGATHERQPD256, ReadAfterLd], (instregex "VGATHERQPDYrm")>; 1735 1736 //-- Conversion instructions --// 1737 1738 // CVTPD2PS. 1739 // x,x. 1740 def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVTPD2PSrr")>; 1741 1742 // x,m128. 1743 def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVTPD2PS(X?)rm")>; 1744 1745 // x,y. 1746 def WriteCVTPD2PSYrr : SchedWriteRes<[HWPort1, HWPort5]> { 1747 let Latency = 5; 1748 let NumMicroOps = 2; 1749 let ResourceCycles = [1, 1]; 1750 } 1751 def : InstRW<[WriteCVTPD2PSYrr], (instregex "(V?)CVTPD2PSYrr")>; 1752 1753 // x,m256. 1754 def WriteCVTPD2PSYrm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { 1755 let Latency = 9; 1756 let NumMicroOps = 3; 1757 let ResourceCycles = [1, 1, 1]; 1758 } 1759 def : InstRW<[WriteCVTPD2PSYrm], (instregex "(V?)CVTPD2PSYrm")>; 1760 1761 // CVTSD2SS. 1762 // x,x. 1763 def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V)?CVTSD2SSrr")>; 1764 1765 // x,m64. 1766 def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(Int_)?(V)?CVTSD2SSrm")>; 1767 1768 // CVTPS2PD. 1769 // x,x. 1770 def WriteCVTPS2PDrr : SchedWriteRes<[HWPort0, HWPort5]> { 1771 let Latency = 2; 1772 let NumMicroOps = 2; 1773 let ResourceCycles = [1, 1]; 1774 } 1775 def : InstRW<[WriteCVTPS2PDrr], (instregex "(V?)CVTPS2PDrr")>; 1776 1777 // x,m64. 1778 // y,m128. 1779 def WriteCVTPS2PDrm : SchedWriteRes<[HWPort0, HWPort23]> { 1780 let Latency = 5; 1781 let NumMicroOps = 2; 1782 let ResourceCycles = [1, 1]; 1783 } 1784 def : InstRW<[WriteCVTPS2PDrm], (instregex "(V?)CVTPS2PD(Y?)rm")>; 1785 1786 // y,x. 1787 def WriteVCVTPS2PDYrr : SchedWriteRes<[HWPort0, HWPort5]> { 1788 let Latency = 5; 1789 let NumMicroOps = 2; 1790 let ResourceCycles = [1, 1]; 1791 } 1792 def : InstRW<[WriteVCVTPS2PDYrr], (instregex "VCVTPS2PDYrr")>; 1793 1794 // CVTSS2SD. 1795 // x,x. 1796 def WriteCVTSS2SDrr : SchedWriteRes<[HWPort0, HWPort5]> { 1797 let Latency = 2; 1798 let NumMicroOps = 2; 1799 let ResourceCycles = [1, 1]; 1800 } 1801 def : InstRW<[WriteCVTSS2SDrr], (instregex "(Int_)?(V?)CVTSS2SDrr")>; 1802 1803 // x,m32. 1804 def WriteCVTSS2SDrm : SchedWriteRes<[HWPort0, HWPort23]> { 1805 let Latency = 5; 1806 let NumMicroOps = 2; 1807 let ResourceCycles = [1, 1]; 1808 } 1809 def : InstRW<[WriteCVTSS2SDrm], (instregex "(Int_)?(V?)CVTSS2SDrm")>; 1810 1811 // CVTDQ2PD. 1812 // x,x. 1813 def : InstRW<[WriteP1_P5_Lat4], (instregex "(V)?CVTDQ2PDrr")>; 1814 1815 // y,x. 1816 def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVTDQ2PDYrr")>; 1817 1818 // CVT(T)PD2DQ. 1819 // x,x. 1820 def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVT(T?)PD2DQrr")>; 1821 // x,m128. 1822 def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVT(T?)PD2DQrm")>; 1823 // x,y. 1824 def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVT(T?)PD2DQYrr")>; 1825 // x,m256. 1826 def : InstRW<[WriteP1_P5_Lat6Ld], (instregex "VCVT(T?)PD2DQYrm")>; 1827 1828 // CVT(T)PS2PI. 1829 // mm,x. 1830 def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PS2PIirr")>; 1831 1832 // CVTPI2PD. 1833 // x,mm. 1834 def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PI2PDirr")>; 1835 1836 // CVT(T)PD2PI. 1837 // mm,x. 1838 def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PD2PIirr")>; 1839 1840 // CVSTSI2SS. 1841 // x,r32. 1842 def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V?)CVT(T?)SI2SS(64)?rr")>; 1843 1844 // CVT(T)SS2SI. 1845 // r32,x. 1846 def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rr")>; 1847 // r32,m32. 1848 def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rm")>; 1849 1850 // CVTSI2SD. 1851 // x,r32/64. 1852 def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVTSI2SS(64)?rr")>; 1853 1854 // CVTSD2SI. 1855 // r32/64 1856 def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rr")>; 1857 // r32,m32. 1858 def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rm")>; 1859 1860 // VCVTPS2PH. 1861 // x,v,i. 1862 def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPS2PH(Y?)rr")>; 1863 // m,v,i. 1864 def : InstRW<[WriteP1_P5_Lat4Ld, WriteRMW], (instregex "VCVTPS2PH(Y?)mr")>; 1865 1866 // VCVTPH2PS. 1867 // v,x. 1868 def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPH2PS(Y?)rr")>; 1869 1870 //-- Arithmetic instructions --// 1871 1872 // HADD, HSUB PS/PD 1873 // x,x / v,v,v. 1874 def WriteHADDSUBPr : SchedWriteRes<[HWPort1, HWPort5]> { 1875 let Latency = 5; 1876 let NumMicroOps = 3; 1877 let ResourceCycles = [1, 2]; 1878 } 1879 def : InstRW<[WriteHADDSUBPr], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rr")>; 1880 1881 // x,m / v,v,m. 1882 def WriteHADDSUBPm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { 1883 let Latency = 9; 1884 let NumMicroOps = 4; 1885 let ResourceCycles = [1, 2, 1]; 1886 } 1887 def : InstRW<[WriteHADDSUBPm], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rm")>; 1888 1889 // MULL SS/SD PS/PD. 1890 // x,x / v,v,v. 1891 def WriteMULr : SchedWriteRes<[HWPort01]> { 1892 let Latency = 5; 1893 } 1894 def : InstRW<[WriteMULr], (instregex "(V?)MUL(P|S)(S|D)rr")>; 1895 1896 // x,m / v,v,m. 1897 def WriteMULm : SchedWriteRes<[HWPort01, HWPort23]> { 1898 let Latency = 9; 1899 let NumMicroOps = 2; 1900 let ResourceCycles = [1, 1]; 1901 } 1902 def : InstRW<[WriteMULm], (instregex "(V?)MUL(P|S)(S|D)rm")>; 1903 1904 // VDIVPS. 1905 // y,y,y. 1906 def WriteVDIVPSYrr : SchedWriteRes<[HWPort0, HWPort15]> { 1907 let Latency = 19; // 18-21 cycles. 1908 let NumMicroOps = 3; 1909 let ResourceCycles = [2, 1]; 1910 } 1911 def : InstRW<[WriteVDIVPSYrr], (instregex "VDIVPSYrr")>; 1912 1913 // y,y,m256. 1914 def WriteVDIVPSYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { 1915 let Latency = 23; // 18-21 + 4 cycles. 1916 let NumMicroOps = 4; 1917 let ResourceCycles = [2, 1, 1]; 1918 } 1919 def : InstRW<[WriteVDIVPSYrm, ReadAfterLd], (instregex "VDIVPSYrm")>; 1920 1921 // VDIVPD. 1922 // y,y,y. 1923 def WriteVDIVPDYrr : SchedWriteRes<[HWPort0, HWPort15]> { 1924 let Latency = 27; // 19-35 cycles. 1925 let NumMicroOps = 3; 1926 let ResourceCycles = [2, 1]; 1927 } 1928 def : InstRW<[WriteVDIVPDYrr], (instregex "VDIVPDYrr")>; 1929 1930 // y,y,m256. 1931 def WriteVDIVPDYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { 1932 let Latency = 31; // 19-35 + 4 cycles. 1933 let NumMicroOps = 4; 1934 let ResourceCycles = [2, 1, 1]; 1935 } 1936 def : InstRW<[WriteVDIVPDYrm, ReadAfterLd], (instregex "VDIVPDYrm")>; 1937 1938 // VRCPPS. 1939 // y,y. 1940 def WriteVRCPPSr : SchedWriteRes<[HWPort0, HWPort15]> { 1941 let Latency = 7; 1942 let NumMicroOps = 3; 1943 let ResourceCycles = [2, 1]; 1944 } 1945 def : InstRW<[WriteVRCPPSr], (instregex "VRCPPSYr(_Int)?")>; 1946 1947 // y,m256. 1948 def WriteVRCPPSm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { 1949 let Latency = 11; 1950 let NumMicroOps = 4; 1951 let ResourceCycles = [2, 1, 1]; 1952 } 1953 def : InstRW<[WriteVRCPPSm], (instregex "VRCPPSYm(_Int)?")>; 1954 1955 // ROUND SS/SD PS/PD. 1956 // v,v,i. 1957 def WriteROUNDr : SchedWriteRes<[HWPort1]> { 1958 let Latency = 6; 1959 let NumMicroOps = 2; 1960 let ResourceCycles = [2]; 1961 } 1962 def : InstRW<[WriteROUNDr], (instregex "(V?)ROUND(Y?)(S|P)(S|D)r(_Int)?")>; 1963 1964 // v,m,i. 1965 def WriteROUNDm : SchedWriteRes<[HWPort1, HWPort23]> { 1966 let Latency = 10; 1967 let NumMicroOps = 3; 1968 let ResourceCycles = [2, 1]; 1969 } 1970 def : InstRW<[WriteROUNDm], (instregex "(V?)ROUND(Y?)(S|P)(S|D)m(_Int)?")>; 1971 1972 // DPPS. 1973 // x,x,i / v,v,v,i. 1974 def WriteDPPSr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> { 1975 let Latency = 14; 1976 let NumMicroOps = 4; 1977 let ResourceCycles = [2, 1, 1]; 1978 } 1979 def : InstRW<[WriteDPPSr], (instregex "(V?)DPPS(Y?)rri")>; 1980 1981 // x,m,i / v,v,m,i. 1982 def WriteDPPSm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23, HWPort6]> { 1983 let Latency = 18; 1984 let NumMicroOps = 6; 1985 let ResourceCycles = [2, 1, 1, 1, 1]; 1986 } 1987 def : InstRW<[WriteDPPSm, ReadAfterLd], (instregex "(V?)DPPS(Y?)rmi")>; 1988 1989 // DPPD. 1990 // x,x,i. 1991 def WriteDPPDr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> { 1992 let Latency = 9; 1993 let NumMicroOps = 3; 1994 let ResourceCycles = [1, 1, 1]; 1995 } 1996 def : InstRW<[WriteDPPDr], (instregex "(V?)DPPDrri")>; 1997 1998 // x,m,i. 1999 def WriteDPPDm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23]> { 2000 let Latency = 13; 2001 let NumMicroOps = 4; 2002 let ResourceCycles = [1, 1, 1, 1]; 2003 } 2004 def : InstRW<[WriteDPPDm], (instregex "(V?)DPPDrmi")>; 2005 2006 // VFMADD. 2007 // v,v,v. 2008 def WriteFMADDr : SchedWriteRes<[HWPort01]> { 2009 let Latency = 5; 2010 let NumMicroOps = 1; 2011 } 2012 def : InstRW<[WriteFMADDr], 2013 (instregex 2014 // 3p forms. 2015 "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)r(Y)?", 2016 // 3s forms. 2017 "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)r", 2018 // 4s/4s_int forms. 2019 "VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?", 2020 // 4p forms. 2021 "VF(N?)M(ADD|SUB)P(S|D)4rr(Y)?(_REV)?")>; 2022 2023 // v,v,m. 2024 def WriteFMADDm : SchedWriteRes<[HWPort01, HWPort23]> { 2025 let Latency = 9; 2026 let NumMicroOps = 2; 2027 let ResourceCycles = [1, 1]; 2028 } 2029 def : InstRW<[WriteFMADDm], 2030 (instregex 2031 // 3p forms. 2032 "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)m(Y)?", 2033 // 3s forms. 2034 "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)m", 2035 // 4s/4s_int forms. 2036 "VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?", 2037 // 4p forms. 2038 "VF(N?)M(ADD|SUB)P(S|D)4(rm|mr)(Y)?")>; 2039 2040 //-- Math instructions --// 2041 2042 // VSQRTPS. 2043 // y,y. 2044 def WriteVSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> { 2045 let Latency = 19; 2046 let NumMicroOps = 3; 2047 let ResourceCycles = [2, 1]; 2048 } 2049 def : InstRW<[WriteVSQRTPSYr], (instregex "VSQRTPSYr")>; 2050 2051 // y,m256. 2052 def WriteVSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { 2053 let Latency = 23; 2054 let NumMicroOps = 4; 2055 let ResourceCycles = [2, 1, 1]; 2056 } 2057 def : InstRW<[WriteVSQRTPSYm], (instregex "VSQRTPSYm")>; 2058 2059 // VSQRTPD. 2060 // y,y. 2061 def WriteVSQRTPDYr : SchedWriteRes<[HWPort0, HWPort15]> { 2062 let Latency = 28; 2063 let NumMicroOps = 3; 2064 let ResourceCycles = [2, 1]; 2065 } 2066 def : InstRW<[WriteVSQRTPDYr], (instregex "VSQRTPDYr")>; 2067 2068 // y,m256. 2069 def WriteVSQRTPDYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { 2070 let Latency = 32; 2071 let NumMicroOps = 4; 2072 let ResourceCycles = [2, 1, 1]; 2073 } 2074 def : InstRW<[WriteVSQRTPDYm], (instregex "VSQRTPDYm")>; 2075 2076 // RSQRT SS/PS. 2077 // x,x. 2078 def WriteRSQRTr : SchedWriteRes<[HWPort0]> { 2079 let Latency = 5; 2080 } 2081 def : InstRW<[WriteRSQRTr], (instregex "(V?)RSQRT(SS|PS)r(_Int)?")>; 2082 2083 // x,m128. 2084 def WriteRSQRTm : SchedWriteRes<[HWPort0, HWPort23]> { 2085 let Latency = 9; 2086 let NumMicroOps = 2; 2087 let ResourceCycles = [1, 1]; 2088 } 2089 def : InstRW<[WriteRSQRTm], (instregex "(V?)RSQRT(SS|PS)m(_Int)?")>; 2090 2091 // RSQRTPS 256. 2092 // y,y. 2093 def WriteRSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> { 2094 let Latency = 7; 2095 let NumMicroOps = 3; 2096 let ResourceCycles = [2, 1]; 2097 } 2098 def : InstRW<[WriteRSQRTPSYr], (instregex "VRSQRTPSYr(_Int)?")>; 2099 2100 // y,m256. 2101 def WriteRSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { 2102 let Latency = 11; 2103 let NumMicroOps = 4; 2104 let ResourceCycles = [2, 1, 1]; 2105 } 2106 def : InstRW<[WriteRSQRTPSYm], (instregex "VRSQRTPSYm(_Int)?")>; 2107 2108 //-- Logic instructions --// 2109 2110 // AND, ANDN, OR, XOR PS/PD. 2111 // x,x / v,v,v. 2112 def : InstRW<[WriteP5], (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rr")>; 2113 // x,m / v,v,m. 2114 def : InstRW<[WriteP5Ld, ReadAfterLd], 2115 (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rm")>; 2116 2117 //-- Other instructions --// 2118 2119 // VZEROUPPER. 2120 def WriteVZEROUPPER : SchedWriteRes<[]> { 2121 let NumMicroOps = 4; 2122 } 2123 def : InstRW<[WriteVZEROUPPER], (instregex "VZEROUPPER")>; 2124 2125 // VZEROALL. 2126 def WriteVZEROALL : SchedWriteRes<[]> { 2127 let NumMicroOps = 12; 2128 } 2129 def : InstRW<[WriteVZEROALL], (instregex "VZEROALL")>; 2130 2131 // LDMXCSR. 2132 def WriteLDMXCSR : SchedWriteRes<[HWPort0, HWPort6, HWPort23]> { 2133 let Latency = 6; 2134 let NumMicroOps = 3; 2135 let ResourceCycles = [1, 1, 1]; 2136 } 2137 def : InstRW<[WriteLDMXCSR], (instregex "(V)?LDMXCSR")>; 2138 2139 // STMXCSR. 2140 def WriteSTMXCSR : SchedWriteRes<[HWPort0, HWPort4, HWPort6, HWPort237]> { 2141 let Latency = 7; 2142 let NumMicroOps = 4; 2143 let ResourceCycles = [1, 1, 1, 1]; 2144 } 2145 def : InstRW<[WriteSTMXCSR], (instregex "(V)?STMXCSR")>; 2146 2147 } // SchedModel 2148