1 //=- AArch64SchedVulcan.td - Vulcan Scheduling Defs ----------*- tablegen -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 1. Introduction 10 // 11 // This file defines the machine model for Broadcom Vulcan to support 12 // instruction scheduling and other instruction cost heuristics. 13 // 14 //===----------------------------------------------------------------------===// 15 16 //===----------------------------------------------------------------------===// 17 // 2. Pipeline Description. 18 19 def VulcanModel : SchedMachineModel { 20 let IssueWidth = 4; // 4 micro-ops dispatched at a time. 21 let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer. 22 let LoadLatency = 4; // Optimistic load latency. 23 let MispredictPenalty = 12; // Extra cycles for mispredicted branch. 24 // Determined via a mix of micro-arch details and experimentation. 25 let LoopMicroOpBufferSize = 32; 26 let PostRAScheduler = 1; // Using PostRA sched. 27 let CompleteModel = 1; 28 } 29 30 // Define the issue ports. 31 32 // Port 0: ALU, FP/SIMD. 33 def VulcanP0 : ProcResource<1>; 34 35 // Port 1: ALU, FP/SIMD, integer mul/div. 36 def VulcanP1 : ProcResource<1>; 37 38 // Port 2: ALU, Branch. 39 def VulcanP2 : ProcResource<1>; 40 41 // Port 3: Store data. 42 def VulcanP3 : ProcResource<1>; 43 44 // Port 4: Load/store. 45 def VulcanP4 : ProcResource<1>; 46 47 // Port 5: Load/store. 48 def VulcanP5 : ProcResource<1>; 49 50 let SchedModel = VulcanModel in { 51 52 // Define groups for the functional units on each 53 // issue port. Each group created will be used 54 // by a WriteRes later on. 55 // 56 // NOTE: Some groups only contain one member. This 57 // is a way to create names for the various functional 58 // units that share a single issue port. For example, 59 // "VulcanI1" for ALU ops on port 1 and "VulcanF1" for 60 // FP ops on port 1. 61 62 // Integer divide and multiply micro-ops only on port 1. 63 def VulcanI1 : ProcResGroup<[VulcanP1]>; 64 65 // Branch micro-ops only on port 2. 66 def VulcanI2 : ProcResGroup<[VulcanP2]>; 67 68 // ALU micro-ops on ports 0, 1, and 2. 69 def VulcanI012 : ProcResGroup<[VulcanP0, VulcanP1, VulcanP2]>; 70 71 // Crypto FP/SIMD micro-ops only on port 1. 72 def VulcanF1 : ProcResGroup<[VulcanP1]>; 73 74 // FP/SIMD micro-ops on ports 0 and 1. 75 def VulcanF01 : ProcResGroup<[VulcanP0, VulcanP1]>; 76 77 // Store data micro-ops only on port 3. 78 def VulcanSD : ProcResGroup<[VulcanP3]>; 79 80 // Load/store micro-ops on ports 4 and 5. 81 def VulcanLS01 : ProcResGroup<[VulcanP4, VulcanP5]>; 82 83 // 60 entry unified scheduler. 84 def VulcanAny : ProcResGroup<[VulcanP0, VulcanP1, VulcanP2, 85 VulcanP3, VulcanP4, VulcanP5]> { 86 let BufferSize=60; 87 } 88 89 // Define commonly used write types for InstRW specializations. 90 // All definitions follow the format: VulcanWrite_<NumCycles>Cyc_<Resources>. 91 92 // 3 cycles on I1. 93 def VulcanWrite_3Cyc_I1 : SchedWriteRes<[VulcanI1]> { let Latency = 3; } 94 95 // 4 cycles on I1. 96 def VulcanWrite_4Cyc_I1 : SchedWriteRes<[VulcanI1]> { let Latency = 4; } 97 98 // 1 cycle on I0, I1, or I2. 99 def VulcanWrite_1Cyc_I012 : SchedWriteRes<[VulcanI012]> { let Latency = 1; } 100 101 // 5 cycles on F1. 102 def VulcanWrite_5Cyc_F1 : SchedWriteRes<[VulcanF1]> { let Latency = 5; } 103 104 // 7 cycles on F1. 105 def VulcanWrite_7Cyc_F1 : SchedWriteRes<[VulcanF1]> { let Latency = 7; } 106 107 // 4 cycles on F0 or F1. 108 def VulcanWrite_4Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 4; } 109 110 // 5 cycles on F0 or F1. 111 def VulcanWrite_5Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 5; } 112 113 // 6 cycles on F0 or F1. 114 def VulcanWrite_6Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 6; } 115 116 // 7 cycles on F0 or F1. 117 def VulcanWrite_7Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 7; } 118 119 // 8 cycles on F0 or F1. 120 def VulcanWrite_8Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 8; } 121 122 // 16 cycles on F0 or F1. 123 def VulcanWrite_16Cyc_F01 : SchedWriteRes<[VulcanF01]> { 124 let Latency = 16; 125 let ResourceCycles = [8]; 126 } 127 128 // 23 cycles on F0 or F1. 129 def VulcanWrite_23Cyc_F01 : SchedWriteRes<[VulcanF01]> { 130 let Latency = 23; 131 let ResourceCycles = [11]; 132 } 133 134 // 1 cycles on LS0 or LS1. 135 def VulcanWrite_1Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 1; } 136 137 // 4 cycles on LS0 or LS1. 138 def VulcanWrite_4Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 4; } 139 140 // 5 cycles on LS0 or LS1. 141 def VulcanWrite_5Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 5; } 142 143 // 6 cycles on LS0 or LS1. 144 def VulcanWrite_6Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 6; } 145 146 // 5 cycles on LS0 or LS1 and I0, I1, or I2. 147 def VulcanWrite_5Cyc_LS01_I012 : SchedWriteRes<[VulcanLS01, VulcanI012]> { 148 let Latency = 5; 149 let NumMicroOps = 2; 150 } 151 152 // 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2. 153 def VulcanWrite_6Cyc_LS01_I012_I012 : 154 SchedWriteRes<[VulcanLS01, VulcanI012, VulcanI012]> { 155 let Latency = 6; 156 let NumMicroOps = 3; 157 } 158 159 // 1 cycles on LS0 or LS1 and F0 or F1. 160 def VulcanWrite_1Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> { 161 let Latency = 1; 162 let NumMicroOps = 2; 163 } 164 165 // 5 cycles on LS0 or LS1 and F0 or F1. 166 def VulcanWrite_5Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> { 167 let Latency = 5; 168 let NumMicroOps = 2; 169 } 170 171 // 6 cycles on LS0 or LS1 and F0 or F1. 172 def VulcanWrite_6Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> { 173 let Latency = 6; 174 let NumMicroOps = 2; 175 } 176 177 // 7 cycles on LS0 or LS1 and F0 or F1. 178 def VulcanWrite_7Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> { 179 let Latency = 7; 180 let NumMicroOps = 2; 181 } 182 183 // 8 cycles on LS0 or LS1 and F0 or F1. 184 def VulcanWrite_8Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> { 185 let Latency = 8; 186 let NumMicroOps = 2; 187 } 188 189 // Define commonly used read types. 190 191 // No forwarding is provided for these types. 192 def : ReadAdvance<ReadI, 0>; 193 def : ReadAdvance<ReadISReg, 0>; 194 def : ReadAdvance<ReadIEReg, 0>; 195 def : ReadAdvance<ReadIM, 0>; 196 def : ReadAdvance<ReadIMA, 0>; 197 def : ReadAdvance<ReadID, 0>; 198 def : ReadAdvance<ReadExtrHi, 0>; 199 def : ReadAdvance<ReadAdrBase, 0>; 200 def : ReadAdvance<ReadVLD, 0>; 201 202 } 203 204 205 //===----------------------------------------------------------------------===// 206 // 3. Instruction Tables. 207 208 let SchedModel = VulcanModel in { 209 210 //--- 211 // 3.1 Branch Instructions 212 //--- 213 214 // Branch, immed 215 // Branch and link, immed 216 // Compare and branch 217 def : WriteRes<WriteBr, [VulcanI2]> { let Latency = 1; } 218 219 def : WriteRes<WriteSys, []> { let Latency = 1; } 220 def : WriteRes<WriteBarrier, []> { let Latency = 1; } 221 def : WriteRes<WriteHint, []> { let Latency = 1; } 222 223 def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 224 225 // Branch, register 226 // Branch and link, register != LR 227 // Branch and link, register = LR 228 def : WriteRes<WriteBrReg, [VulcanI2]> { let Latency = 1; } 229 230 //--- 231 // 3.2 Arithmetic and Logical Instructions 232 // 3.3 Move and Shift Instructions 233 //--- 234 235 // ALU, basic 236 // Conditional compare 237 // Conditional select 238 // Address generation 239 def : WriteRes<WriteI, [VulcanI012]> { let Latency = 1; } 240 def : InstRW<[WriteI], (instrs COPY)>; 241 242 // ALU, extend and/or shift 243 def : WriteRes<WriteISReg, [VulcanI012]> { 244 let Latency = 2; 245 let ResourceCycles = [2]; 246 } 247 248 def : WriteRes<WriteIEReg, [VulcanI012]> { 249 let Latency = 2; 250 let ResourceCycles = [2]; 251 } 252 253 // Move immed 254 def : WriteRes<WriteImm, [VulcanI012]> { let Latency = 1; } 255 256 // Variable shift 257 def : WriteRes<WriteIS, [VulcanI012]> { let Latency = 1; } 258 259 //--- 260 // 3.4 Divide and Multiply Instructions 261 //--- 262 263 // Divide, W-form 264 // Latency range of 13-23. Take the average. 265 def : WriteRes<WriteID32, [VulcanI1]> { 266 let Latency = 18; 267 let ResourceCycles = [18]; 268 } 269 270 // Divide, X-form 271 // Latency range of 13-39. Take the average. 272 def : WriteRes<WriteID64, [VulcanI1]> { 273 let Latency = 26; 274 let ResourceCycles = [26]; 275 } 276 277 // Multiply accumulate, W-form 278 def : WriteRes<WriteIM32, [VulcanI012]> { let Latency = 5; } 279 280 // Multiply accumulate, X-form 281 def : WriteRes<WriteIM64, [VulcanI012]> { let Latency = 5; } 282 283 // Bitfield extract, two reg 284 def : WriteRes<WriteExtr, [VulcanI012]> { let Latency = 1; } 285 286 // Bitfield move, basic 287 // Bitfield move, insert 288 // NOTE: Handled by WriteIS. 289 290 // Count leading 291 def : InstRW<[VulcanWrite_3Cyc_I1], (instregex "^CLS(W|X)r$", 292 "^CLZ(W|X)r$")>; 293 294 // Reverse bits/bytes 295 // NOTE: Handled by WriteI. 296 297 //--- 298 // 3.6 Load Instructions 299 // 3.10 FP Load Instructions 300 //--- 301 302 // Load register, literal 303 // Load register, unscaled immed 304 // Load register, immed unprivileged 305 // Load register, unsigned immed 306 def : WriteRes<WriteLD, [VulcanLS01]> { let Latency = 4; } 307 308 // Load register, immed post-index 309 // NOTE: Handled by WriteLD, WriteI. 310 // Load register, immed pre-index 311 // NOTE: Handled by WriteLD, WriteAdr. 312 def : WriteRes<WriteAdr, [VulcanI012]> { let Latency = 1; } 313 314 // Load register offset, basic 315 // Load register, register offset, scale by 4/8 316 // Load register, register offset, scale by 2 317 // Load register offset, extend 318 // Load register, register offset, extend, scale by 4/8 319 // Load register, register offset, extend, scale by 2 320 def VulcanWriteLDIdx : SchedWriteVariant<[ 321 SchedVar<ScaledIdxPred, [VulcanWrite_6Cyc_LS01_I012_I012]>, 322 SchedVar<NoSchedPred, [VulcanWrite_5Cyc_LS01_I012]>]>; 323 def : SchedAlias<WriteLDIdx, VulcanWriteLDIdx>; 324 325 def VulcanReadAdrBase : SchedReadVariant<[ 326 SchedVar<ScaledIdxPred, [ReadDefault]>, 327 SchedVar<NoSchedPred, [ReadDefault]>]>; 328 def : SchedAlias<ReadAdrBase, VulcanReadAdrBase>; 329 330 // Load pair, immed offset, normal 331 // Load pair, immed offset, signed words, base != SP 332 // Load pair, immed offset signed words, base = SP 333 // LDP only breaks into *one* LS micro-op. Thus 334 // the resources are handling by WriteLD. 335 def : WriteRes<WriteLDHi, []> { 336 let Latency = 5; 337 } 338 339 // Load pair, immed pre-index, normal 340 // Load pair, immed pre-index, signed words 341 // Load pair, immed post-index, normal 342 // Load pair, immed post-index, signed words 343 // NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. 344 345 //-- 346 // 3.7 Store Instructions 347 // 3.11 FP Store Instructions 348 //-- 349 350 // Store register, unscaled immed 351 // Store register, immed unprivileged 352 // Store register, unsigned immed 353 def : WriteRes<WriteST, [VulcanLS01, VulcanSD]> { 354 let Latency = 1; 355 let NumMicroOps = 2; 356 } 357 358 // Store register, immed post-index 359 // NOTE: Handled by WriteAdr, WriteST, ReadAdrBase 360 361 // Store register, immed pre-index 362 // NOTE: Handled by WriteAdr, WriteST 363 364 // Store register, register offset, basic 365 // Store register, register offset, scaled by 4/8 366 // Store register, register offset, scaled by 2 367 // Store register, register offset, extend 368 // Store register, register offset, extend, scale by 4/8 369 // Store register, register offset, extend, scale by 1 370 def : WriteRes<WriteSTIdx, [VulcanLS01, VulcanSD, VulcanI012]> { 371 let Latency = 1; 372 let NumMicroOps = 3; 373 } 374 375 // Store pair, immed offset, W-form 376 // Store pair, immed offset, X-form 377 def : WriteRes<WriteSTP, [VulcanLS01, VulcanSD]> { 378 let Latency = 1; 379 let NumMicroOps = 2; 380 } 381 382 // Store pair, immed post-index, W-form 383 // Store pair, immed post-index, X-form 384 // Store pair, immed pre-index, W-form 385 // Store pair, immed pre-index, X-form 386 // NOTE: Handled by WriteAdr, WriteSTP. 387 388 //--- 389 // 3.8 FP Data Processing Instructions 390 //--- 391 392 // FP absolute value 393 // FP min/max 394 // FP negate 395 def : WriteRes<WriteF, [VulcanF01]> { let Latency = 5; } 396 397 // FP arithmetic 398 def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FADD", "^FSUB")>; 399 400 // FP compare 401 def : WriteRes<WriteFCmp, [VulcanF01]> { let Latency = 5; } 402 403 // FP divide, S-form 404 // FP square root, S-form 405 def : WriteRes<WriteFDiv, [VulcanF01]> { 406 let Latency = 16; 407 let ResourceCycles = [8]; 408 } 409 410 // FP divide, D-form 411 // FP square root, D-form 412 def : InstRW<[VulcanWrite_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>; 413 414 // FP multiply 415 // FP multiply accumulate 416 def : WriteRes<WriteFMul, [VulcanF01]> { let Latency = 6; } 417 418 // FP round to integral 419 def : InstRW<[VulcanWrite_7Cyc_F01], 420 (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; 421 422 // FP select 423 def : InstRW<[VulcanWrite_4Cyc_F01], (instregex "^FCSEL")>; 424 425 //--- 426 // 3.9 FP Miscellaneous Instructions 427 //--- 428 429 // FP convert, from vec to vec reg 430 // FP convert, from gen to vec reg 431 // FP convert, from vec to gen reg 432 def : WriteRes<WriteFCvt, [VulcanF01]> { let Latency = 7; } 433 434 // FP move, immed 435 // FP move, register 436 def : WriteRes<WriteFImm, [VulcanF01]> { let Latency = 4; } 437 438 // FP transfer, from gen to vec reg 439 // FP transfer, from vec to gen reg 440 def : WriteRes<WriteFCopy, [VulcanF01]> { let Latency = 4; } 441 def : InstRW<[VulcanWrite_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; 442 443 //--- 444 // 3.12 ASIMD Integer Instructions 445 //--- 446 447 // ASIMD absolute diff, D-form 448 // ASIMD absolute diff, Q-form 449 // ASIMD absolute diff accum, D-form 450 // ASIMD absolute diff accum, Q-form 451 // ASIMD absolute diff accum long 452 // ASIMD absolute diff long 453 // ASIMD arith, basic 454 // ASIMD arith, complex 455 // ASIMD compare 456 // ASIMD logical (AND, BIC, EOR) 457 // ASIMD max/min, basic 458 // ASIMD max/min, reduce, 4H/4S 459 // ASIMD max/min, reduce, 8B/8H 460 // ASIMD max/min, reduce, 16B 461 // ASIMD multiply, D-form 462 // ASIMD multiply, Q-form 463 // ASIMD multiply accumulate long 464 // ASIMD multiply accumulate saturating long 465 // ASIMD multiply long 466 // ASIMD pairwise add and accumulate 467 // ASIMD shift accumulate 468 // ASIMD shift by immed, basic 469 // ASIMD shift by immed and insert, basic, D-form 470 // ASIMD shift by immed and insert, basic, Q-form 471 // ASIMD shift by immed, complex 472 // ASIMD shift by register, basic, D-form 473 // ASIMD shift by register, basic, Q-form 474 // ASIMD shift by register, complex, D-form 475 // ASIMD shift by register, complex, Q-form 476 def : WriteRes<WriteV, [VulcanF01]> { let Latency = 7; } 477 478 // ASIMD arith, reduce, 4H/4S 479 // ASIMD arith, reduce, 8B/8H 480 // ASIMD arith, reduce, 16B 481 def : InstRW<[VulcanWrite_5Cyc_F01], 482 (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; 483 484 // ASIMD logical (MOV, MVN, ORN, ORR) 485 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>; 486 487 // ASIMD polynomial (8x8) multiply long 488 def : InstRW<[VulcanWrite_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>; 489 490 //--- 491 // 3.13 ASIMD Floating-point Instructions 492 //--- 493 494 // ASIMD FP absolute value 495 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FABSv")>; 496 497 // ASIMD FP arith, normal, D-form 498 // ASIMD FP arith, normal, Q-form 499 def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>; 500 501 // ASIMD FP arith,pairwise, D-form 502 // ASIMD FP arith, pairwise, Q-form 503 def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FADDPv")>; 504 505 // ASIMD FP compare, D-form 506 // ASIMD FP compare, Q-form 507 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>; 508 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv", 509 "^FCMGTv", "^FCMLEv", 510 "^FCMLTv")>; 511 512 // ASIMD FP convert, long 513 // ASIMD FP convert, narrow 514 // ASIMD FP convert, other, D-form 515 // ASIMD FP convert, other, Q-form 516 // NOTE: Handled by WriteV. 517 518 // ASIMD FP divide, D-form, F32 519 def : InstRW<[VulcanWrite_16Cyc_F01], (instrs FDIVv2f32)>; 520 521 // ASIMD FP divide, Q-form, F32 522 def : InstRW<[VulcanWrite_16Cyc_F01], (instrs FDIVv4f32)>; 523 524 // ASIMD FP divide, Q-form, F64 525 def : InstRW<[VulcanWrite_23Cyc_F01], (instrs FDIVv2f64)>; 526 527 // ASIMD FP max/min, normal, D-form 528 // ASIMD FP max/min, normal, Q-form 529 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv", 530 "^FMINv", "^FMINNMv")>; 531 532 // ASIMD FP max/min, pairwise, D-form 533 // ASIMD FP max/min, pairwise, Q-form 534 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv", 535 "^FMINPv", "^FMINNMPv")>; 536 537 // ASIMD FP max/min, reduce 538 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv", 539 "^FMINVv", "^FMINNMVv")>; 540 541 // ASIMD FP multiply, D-form, FZ 542 // ASIMD FP multiply, D-form, no FZ 543 // ASIMD FP multiply, Q-form, FZ 544 // ASIMD FP multiply, Q-form, no FZ 545 def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>; 546 547 // ASIMD FP multiply accumulate, Dform, FZ 548 // ASIMD FP multiply accumulate, Dform, no FZ 549 // ASIMD FP multiply accumulate, Qform, FZ 550 // ASIMD FP multiply accumulate, Qform, no FZ 551 def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>; 552 553 // ASIMD FP negate 554 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FNEGv")>; 555 556 // ASIMD FP round, D-form 557 // ASIMD FP round, Q-form 558 // NOTE: Handled by WriteV. 559 560 //-- 561 // 3.14 ASIMD Miscellaneous Instructions 562 //-- 563 564 // ASIMD bit reverse 565 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^RBITv")>; 566 567 // ASIMD bitwise insert, D-form 568 // ASIMD bitwise insert, Q-form 569 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>; 570 571 // ASIMD count, D-form 572 // ASIMD count, Q-form 573 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>; 574 575 // ASIMD duplicate, gen reg 576 // ASIMD duplicate, element 577 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^DUPv")>; 578 579 // ASIMD extract 580 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^EXTv")>; 581 582 // ASIMD extract narrow 583 // ASIMD extract narrow, saturating 584 // NOTE: Handled by WriteV. 585 586 // ASIMD insert, element to element 587 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^INSv")>; 588 589 // ASIMD move, integer immed 590 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>; 591 592 // ASIMD move, FP immed 593 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMOVv")>; 594 595 // ASIMD reciprocal estimate, D-form 596 // ASIMD reciprocal estimate, Q-form 597 def : InstRW<[VulcanWrite_5Cyc_F01], 598 (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", 599 "^FRSQRTEv", "^URSQRTEv")>; 600 601 // ASIMD reciprocal step, D-form, FZ 602 // ASIMD reciprocal step, D-form, no FZ 603 // ASIMD reciprocal step, Q-form, FZ 604 // ASIMD reciprocal step, Q-form, no FZ 605 def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>; 606 607 // ASIMD reverse 608 def : InstRW<[VulcanWrite_5Cyc_F01], 609 (instregex "^REV16v", "^REV32v", "^REV64v")>; 610 611 // ASIMD table lookup, D-form 612 // ASIMD table lookup, Q-form 613 def : InstRW<[VulcanWrite_8Cyc_F01], (instregex "^TBLv", "^TBXv")>; 614 615 // ASIMD transfer, element to word or word 616 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^UMOVv")>; 617 618 // ASIMD transfer, element to gen reg 619 def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>; 620 621 // ASIMD transfer gen reg to element 622 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^INSv")>; 623 624 // ASIMD transpose 625 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^TRN1v", "^TRN2v", 626 "^UZP1v", "^UZP2v")>; 627 628 // ASIMD unzip/zip 629 def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>; 630 631 //-- 632 // 3.15 ASIMD Load Instructions 633 //-- 634 635 // ASIMD load, 1 element, multiple, 1 reg, D-form 636 // ASIMD load, 1 element, multiple, 1 reg, Q-form 637 def : InstRW<[VulcanWrite_4Cyc_LS01], 638 (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 639 def : InstRW<[VulcanWrite_4Cyc_LS01, WriteAdr], 640 (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 641 642 // ASIMD load, 1 element, multiple, 2 reg, D-form 643 // ASIMD load, 1 element, multiple, 2 reg, Q-form 644 def : InstRW<[VulcanWrite_4Cyc_LS01], 645 (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 646 def : InstRW<[VulcanWrite_4Cyc_LS01, WriteAdr], 647 (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 648 649 // ASIMD load, 1 element, multiple, 3 reg, D-form 650 // ASIMD load, 1 element, multiple, 3 reg, Q-form 651 def : InstRW<[VulcanWrite_5Cyc_LS01], 652 (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 653 def : InstRW<[VulcanWrite_5Cyc_LS01, WriteAdr], 654 (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 655 656 // ASIMD load, 1 element, multiple, 4 reg, D-form 657 // ASIMD load, 1 element, multiple, 4 reg, Q-form 658 def : InstRW<[VulcanWrite_6Cyc_LS01], 659 (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 660 def : InstRW<[VulcanWrite_6Cyc_LS01, WriteAdr], 661 (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 662 663 // ASIMD load, 1 element, one lane, B/H/S 664 // ASIMD load, 1 element, one lane, D 665 def : InstRW<[VulcanWrite_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>; 666 def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], 667 (instregex "^LD1i(8|16|32|64)_POST$")>; 668 669 // ASIMD load, 1 element, all lanes, D-form, B/H/S 670 // ASIMD load, 1 element, all lanes, D-form, D 671 // ASIMD load, 1 element, all lanes, Q-form 672 def : InstRW<[VulcanWrite_5Cyc_LS01_F01], 673 (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 674 def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], 675 (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 676 677 // ASIMD load, 2 element, multiple, D-form, B/H/S 678 // ASIMD load, 2 element, multiple, Q-form, D 679 def : InstRW<[VulcanWrite_5Cyc_LS01_F01], 680 (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; 681 def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], 682 (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 683 684 // ASIMD load, 2 element, one lane, B/H 685 // ASIMD load, 2 element, one lane, S 686 // ASIMD load, 2 element, one lane, D 687 def : InstRW<[VulcanWrite_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>; 688 def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], 689 (instregex "^LD2i(8|16|32|64)_POST$")>; 690 691 // ASIMD load, 2 element, all lanes, D-form, B/H/S 692 // ASIMD load, 2 element, all lanes, D-form, D 693 // ASIMD load, 2 element, all lanes, Q-form 694 def : InstRW<[VulcanWrite_5Cyc_LS01_F01], 695 (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 696 def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], 697 (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 698 699 // ASIMD load, 3 element, multiple, D-form, B/H/S 700 // ASIMD load, 3 element, multiple, Q-form, B/H/S 701 // ASIMD load, 3 element, multiple, Q-form, D 702 def : InstRW<[VulcanWrite_8Cyc_LS01_F01], 703 (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; 704 def : InstRW<[VulcanWrite_8Cyc_LS01_F01, WriteAdr], 705 (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 706 707 // ASIMD load, 3 element, one lone, B/H 708 // ASIMD load, 3 element, one lane, S 709 // ASIMD load, 3 element, one lane, D 710 def : InstRW<[VulcanWrite_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>; 711 def : InstRW<[VulcanWrite_7Cyc_LS01_F01, WriteAdr], 712 (instregex "^LD3i(8|16|32|64)_POST$")>; 713 714 // ASIMD load, 3 element, all lanes, D-form, B/H/S 715 // ASIMD load, 3 element, all lanes, D-form, D 716 // ASIMD load, 3 element, all lanes, Q-form, B/H/S 717 // ASIMD load, 3 element, all lanes, Q-form, D 718 def : InstRW<[VulcanWrite_7Cyc_LS01_F01], 719 (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 720 def : InstRW<[VulcanWrite_7Cyc_LS01_F01, WriteAdr], 721 (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 722 723 // ASIMD load, 4 element, multiple, D-form, B/H/S 724 // ASIMD load, 4 element, multiple, Q-form, B/H/S 725 // ASIMD load, 4 element, multiple, Q-form, D 726 def : InstRW<[VulcanWrite_8Cyc_LS01_F01], 727 (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; 728 def : InstRW<[VulcanWrite_8Cyc_LS01_F01, WriteAdr], 729 (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 730 731 // ASIMD load, 4 element, one lane, B/H 732 // ASIMD load, 4 element, one lane, S 733 // ASIMD load, 4 element, one lane, D 734 def : InstRW<[VulcanWrite_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>; 735 def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr], 736 (instregex "^LD4i(8|16|32|64)_POST$")>; 737 738 // ASIMD load, 4 element, all lanes, D-form, B/H/S 739 // ASIMD load, 4 element, all lanes, D-form, D 740 // ASIMD load, 4 element, all lanes, Q-form, B/H/S 741 // ASIMD load, 4 element, all lanes, Q-form, D 742 def : InstRW<[VulcanWrite_6Cyc_LS01_F01], 743 (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 744 def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr], 745 (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 746 747 //-- 748 // 3.16 ASIMD Store Instructions 749 //-- 750 751 // ASIMD store, 1 element, multiple, 1 reg, D-form 752 // ASIMD store, 1 element, multiple, 1 reg, Q-form 753 def : InstRW<[VulcanWrite_1Cyc_LS01], 754 (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 755 def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr], 756 (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 757 758 // ASIMD store, 1 element, multiple, 2 reg, D-form 759 // ASIMD store, 1 element, multiple, 2 reg, Q-form 760 def : InstRW<[VulcanWrite_1Cyc_LS01], 761 (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 762 def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr], 763 (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 764 765 // ASIMD store, 1 element, multiple, 3 reg, D-form 766 // ASIMD store, 1 element, multiple, 3 reg, Q-form 767 def : InstRW<[VulcanWrite_1Cyc_LS01], 768 (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 769 def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr], 770 (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 771 772 // ASIMD store, 1 element, multiple, 4 reg, D-form 773 // ASIMD store, 1 element, multiple, 4 reg, Q-form 774 def : InstRW<[VulcanWrite_1Cyc_LS01], 775 (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 776 def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr], 777 (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 778 779 // ASIMD store, 1 element, one lane, B/H/S 780 // ASIMD store, 1 element, one lane, D 781 def : InstRW<[VulcanWrite_1Cyc_LS01_F01], 782 (instregex "^ST1i(8|16|32|64)$")>; 783 def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], 784 (instregex "^ST1i(8|16|32|64)_POST$")>; 785 786 // ASIMD store, 2 element, multiple, D-form, B/H/S 787 // ASIMD store, 2 element, multiple, Q-form, B/H/S 788 // ASIMD store, 2 element, multiple, Q-form, D 789 def : InstRW<[VulcanWrite_1Cyc_LS01_F01], 790 (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; 791 def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], 792 (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 793 794 // ASIMD store, 2 element, one lane, B/H/S 795 // ASIMD store, 2 element, one lane, D 796 def : InstRW<[VulcanWrite_1Cyc_LS01_F01], 797 (instregex "^ST2i(8|16|32|64)$")>; 798 def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], 799 (instregex "^ST2i(8|16|32|64)_POST$")>; 800 801 // ASIMD store, 3 element, multiple, D-form, B/H/S 802 // ASIMD store, 3 element, multiple, Q-form, B/H/S 803 // ASIMD store, 3 element, multiple, Q-form, D 804 def : InstRW<[VulcanWrite_1Cyc_LS01_F01], 805 (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; 806 def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], 807 (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 808 809 // ASIMD store, 3 element, one lane, B/H 810 // ASIMD store, 3 element, one lane, S 811 // ASIMD store, 3 element, one lane, D 812 def : InstRW<[VulcanWrite_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>; 813 def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], 814 (instregex "^ST3i(8|16|32|64)_POST$")>; 815 816 // ASIMD store, 4 element, multiple, D-form, B/H/S 817 // ASIMD store, 4 element, multiple, Q-form, B/H/S 818 // ASIMD store, 4 element, multiple, Q-form, D 819 def : InstRW<[VulcanWrite_1Cyc_LS01_F01], 820 (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; 821 def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], 822 (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 823 824 // ASIMD store, 4 element, one lane, B/H 825 // ASIMD store, 4 element, one lane, S 826 // ASIMD store, 4 element, one lane, D 827 def : InstRW<[VulcanWrite_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>; 828 def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], 829 (instregex "^ST4i(8|16|32|64)_POST$")>; 830 831 //-- 832 // 3.17 Cryptography Extensions 833 //-- 834 835 // Crypto AES ops 836 def : InstRW<[VulcanWrite_5Cyc_F1], (instregex "^AES")>; 837 838 // Crypto polynomial (64x64) multiply long 839 def : InstRW<[VulcanWrite_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>; 840 841 // Crypto SHA1 xor ops 842 // Crypto SHA1 schedule acceleration ops 843 // Crypto SHA256 schedule acceleration op (1 u-op) 844 // Crypto SHA256 schedule acceleration op (2 u-ops) 845 // Crypto SHA256 hash acceleration ops 846 def : InstRW<[VulcanWrite_7Cyc_F1], (instregex "^SHA")>; 847 848 //-- 849 // 3.18 CRC 850 //-- 851 852 // CRC checksum ops 853 def : InstRW<[VulcanWrite_4Cyc_I1], (instregex "^CRC32")>; 854 855 } // SchedModel = VulcanModel 856