1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the AArch64 implementation of the TargetInstrInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64InstrInfo.h" 15 #include "AArch64MachineFunctionInfo.h" 16 #include "AArch64Subtarget.h" 17 #include "MCTargetDesc/AArch64AddressingModes.h" 18 #include "Utils/AArch64BaseInfo.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include "llvm/CodeGen/MachineBasicBlock.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineMemOperand.h" 28 #include "llvm/CodeGen/MachineOperand.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/MachineModuleInfo.h" 31 #include "llvm/CodeGen/StackMaps.h" 32 #include "llvm/CodeGen/TargetRegisterInfo.h" 33 #include "llvm/CodeGen/TargetSubtargetInfo.h" 34 #include "llvm/IR/DebugLoc.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/MC/MCInst.h" 37 #include "llvm/MC/MCInstrDesc.h" 38 #include "llvm/Support/Casting.h" 39 #include "llvm/Support/CodeGen.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Support/Compiler.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/MathExtras.h" 44 #include "llvm/Target/TargetMachine.h" 45 #include "llvm/Target/TargetOptions.h" 46 #include <cassert> 47 #include <cstdint> 48 #include <iterator> 49 #include <utility> 50 51 using namespace llvm; 52 53 #define GET_INSTRINFO_CTOR_DTOR 54 #include "AArch64GenInstrInfo.inc" 55 56 static cl::opt<unsigned> TBZDisplacementBits( 57 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), 58 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); 59 60 static cl::opt<unsigned> CBZDisplacementBits( 61 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), 62 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); 63 64 static cl::opt<unsigned> 65 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), 66 cl::desc("Restrict range of Bcc instructions (DEBUG)")); 67 68 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) 69 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), 70 RI(STI.getTargetTriple()), Subtarget(STI) {} 71 72 /// GetInstSize - Return the number of bytes of code the specified 73 /// instruction may be. This returns the maximum number of bytes. 74 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 75 const MachineBasicBlock &MBB = *MI.getParent(); 76 const MachineFunction *MF = MBB.getParent(); 77 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 78 79 if (MI.getOpcode() == AArch64::INLINEASM) 80 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); 81 82 // FIXME: We currently only handle pseudoinstructions that don't get expanded 83 // before the assembly printer. 84 unsigned NumBytes = 0; 85 const MCInstrDesc &Desc = MI.getDesc(); 86 switch (Desc.getOpcode()) { 87 default: 88 // Anything not explicitly designated otherwise is a normal 4-byte insn. 89 NumBytes = 4; 90 break; 91 case TargetOpcode::DBG_VALUE: 92 case TargetOpcode::EH_LABEL: 93 case TargetOpcode::IMPLICIT_DEF: 94 case TargetOpcode::KILL: 95 NumBytes = 0; 96 break; 97 case TargetOpcode::STACKMAP: 98 // The upper bound for a stackmap intrinsic is the full length of its shadow 99 NumBytes = StackMapOpers(&MI).getNumPatchBytes(); 100 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 101 break; 102 case TargetOpcode::PATCHPOINT: 103 // The size of the patchpoint intrinsic is the number of bytes requested 104 NumBytes = PatchPointOpers(&MI).getNumPatchBytes(); 105 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 106 break; 107 case AArch64::TLSDESC_CALLSEQ: 108 // This gets lowered to an instruction sequence which takes 16 bytes 109 NumBytes = 16; 110 break; 111 } 112 113 return NumBytes; 114 } 115 116 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, 117 SmallVectorImpl<MachineOperand> &Cond) { 118 // Block ends with fall-through condbranch. 119 switch (LastInst->getOpcode()) { 120 default: 121 llvm_unreachable("Unknown branch instruction?"); 122 case AArch64::Bcc: 123 Target = LastInst->getOperand(1).getMBB(); 124 Cond.push_back(LastInst->getOperand(0)); 125 break; 126 case AArch64::CBZW: 127 case AArch64::CBZX: 128 case AArch64::CBNZW: 129 case AArch64::CBNZX: 130 Target = LastInst->getOperand(1).getMBB(); 131 Cond.push_back(MachineOperand::CreateImm(-1)); 132 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 133 Cond.push_back(LastInst->getOperand(0)); 134 break; 135 case AArch64::TBZW: 136 case AArch64::TBZX: 137 case AArch64::TBNZW: 138 case AArch64::TBNZX: 139 Target = LastInst->getOperand(2).getMBB(); 140 Cond.push_back(MachineOperand::CreateImm(-1)); 141 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 142 Cond.push_back(LastInst->getOperand(0)); 143 Cond.push_back(LastInst->getOperand(1)); 144 } 145 } 146 147 static unsigned getBranchDisplacementBits(unsigned Opc) { 148 switch (Opc) { 149 default: 150 llvm_unreachable("unexpected opcode!"); 151 case AArch64::B: 152 return 64; 153 case AArch64::TBNZW: 154 case AArch64::TBZW: 155 case AArch64::TBNZX: 156 case AArch64::TBZX: 157 return TBZDisplacementBits; 158 case AArch64::CBNZW: 159 case AArch64::CBZW: 160 case AArch64::CBNZX: 161 case AArch64::CBZX: 162 return CBZDisplacementBits; 163 case AArch64::Bcc: 164 return BCCDisplacementBits; 165 } 166 } 167 168 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp, 169 int64_t BrOffset) const { 170 unsigned Bits = getBranchDisplacementBits(BranchOp); 171 assert(Bits >= 3 && "max branch displacement must be enough to jump" 172 "over conditional branch expansion"); 173 return isIntN(Bits, BrOffset / 4); 174 } 175 176 MachineBasicBlock * 177 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const { 178 switch (MI.getOpcode()) { 179 default: 180 llvm_unreachable("unexpected opcode!"); 181 case AArch64::B: 182 return MI.getOperand(0).getMBB(); 183 case AArch64::TBZW: 184 case AArch64::TBNZW: 185 case AArch64::TBZX: 186 case AArch64::TBNZX: 187 return MI.getOperand(2).getMBB(); 188 case AArch64::CBZW: 189 case AArch64::CBNZW: 190 case AArch64::CBZX: 191 case AArch64::CBNZX: 192 case AArch64::Bcc: 193 return MI.getOperand(1).getMBB(); 194 } 195 } 196 197 // Branch analysis. 198 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, 199 MachineBasicBlock *&TBB, 200 MachineBasicBlock *&FBB, 201 SmallVectorImpl<MachineOperand> &Cond, 202 bool AllowModify) const { 203 // If the block has no terminators, it just falls into the block after it. 204 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 205 if (I == MBB.end()) 206 return false; 207 208 if (!isUnpredicatedTerminator(*I)) 209 return false; 210 211 // Get the last instruction in the block. 212 MachineInstr *LastInst = &*I; 213 214 // If there is only one terminator instruction, process it. 215 unsigned LastOpc = LastInst->getOpcode(); 216 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 217 if (isUncondBranchOpcode(LastOpc)) { 218 TBB = LastInst->getOperand(0).getMBB(); 219 return false; 220 } 221 if (isCondBranchOpcode(LastOpc)) { 222 // Block ends with fall-through condbranch. 223 parseCondBranch(LastInst, TBB, Cond); 224 return false; 225 } 226 return true; // Can't handle indirect branch. 227 } 228 229 // Get the instruction before it if it is a terminator. 230 MachineInstr *SecondLastInst = &*I; 231 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 232 233 // If AllowModify is true and the block ends with two or more unconditional 234 // branches, delete all but the first unconditional branch. 235 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 236 while (isUncondBranchOpcode(SecondLastOpc)) { 237 LastInst->eraseFromParent(); 238 LastInst = SecondLastInst; 239 LastOpc = LastInst->getOpcode(); 240 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 241 // Return now the only terminator is an unconditional branch. 242 TBB = LastInst->getOperand(0).getMBB(); 243 return false; 244 } else { 245 SecondLastInst = &*I; 246 SecondLastOpc = SecondLastInst->getOpcode(); 247 } 248 } 249 } 250 251 // If there are three terminators, we don't know what sort of block this is. 252 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) 253 return true; 254 255 // If the block ends with a B and a Bcc, handle it. 256 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 257 parseCondBranch(SecondLastInst, TBB, Cond); 258 FBB = LastInst->getOperand(0).getMBB(); 259 return false; 260 } 261 262 // If the block ends with two unconditional branches, handle it. The second 263 // one is not executed, so remove it. 264 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 265 TBB = SecondLastInst->getOperand(0).getMBB(); 266 I = LastInst; 267 if (AllowModify) 268 I->eraseFromParent(); 269 return false; 270 } 271 272 // ...likewise if it ends with an indirect branch followed by an unconditional 273 // branch. 274 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 275 I = LastInst; 276 if (AllowModify) 277 I->eraseFromParent(); 278 return true; 279 } 280 281 // Otherwise, can't handle this. 282 return true; 283 } 284 285 bool AArch64InstrInfo::reverseBranchCondition( 286 SmallVectorImpl<MachineOperand> &Cond) const { 287 if (Cond[0].getImm() != -1) { 288 // Regular Bcc 289 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 290 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); 291 } else { 292 // Folded compare-and-branch 293 switch (Cond[1].getImm()) { 294 default: 295 llvm_unreachable("Unknown conditional branch!"); 296 case AArch64::CBZW: 297 Cond[1].setImm(AArch64::CBNZW); 298 break; 299 case AArch64::CBNZW: 300 Cond[1].setImm(AArch64::CBZW); 301 break; 302 case AArch64::CBZX: 303 Cond[1].setImm(AArch64::CBNZX); 304 break; 305 case AArch64::CBNZX: 306 Cond[1].setImm(AArch64::CBZX); 307 break; 308 case AArch64::TBZW: 309 Cond[1].setImm(AArch64::TBNZW); 310 break; 311 case AArch64::TBNZW: 312 Cond[1].setImm(AArch64::TBZW); 313 break; 314 case AArch64::TBZX: 315 Cond[1].setImm(AArch64::TBNZX); 316 break; 317 case AArch64::TBNZX: 318 Cond[1].setImm(AArch64::TBZX); 319 break; 320 } 321 } 322 323 return false; 324 } 325 326 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, 327 int *BytesRemoved) const { 328 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 329 if (I == MBB.end()) 330 return 0; 331 332 if (!isUncondBranchOpcode(I->getOpcode()) && 333 !isCondBranchOpcode(I->getOpcode())) 334 return 0; 335 336 // Remove the branch. 337 I->eraseFromParent(); 338 339 I = MBB.end(); 340 341 if (I == MBB.begin()) { 342 if (BytesRemoved) 343 *BytesRemoved = 4; 344 return 1; 345 } 346 --I; 347 if (!isCondBranchOpcode(I->getOpcode())) { 348 if (BytesRemoved) 349 *BytesRemoved = 4; 350 return 1; 351 } 352 353 // Remove the branch. 354 I->eraseFromParent(); 355 if (BytesRemoved) 356 *BytesRemoved = 8; 357 358 return 2; 359 } 360 361 void AArch64InstrInfo::instantiateCondBranch( 362 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB, 363 ArrayRef<MachineOperand> Cond) const { 364 if (Cond[0].getImm() != -1) { 365 // Regular Bcc 366 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); 367 } else { 368 // Folded compare-and-branch 369 // Note that we use addOperand instead of addReg to keep the flags. 370 const MachineInstrBuilder MIB = 371 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]); 372 if (Cond.size() > 3) 373 MIB.addImm(Cond[3].getImm()); 374 MIB.addMBB(TBB); 375 } 376 } 377 378 unsigned AArch64InstrInfo::insertBranch( 379 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 380 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { 381 // Shouldn't be a fall through. 382 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 383 384 if (!FBB) { 385 if (Cond.empty()) // Unconditional branch? 386 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); 387 else 388 instantiateCondBranch(MBB, DL, TBB, Cond); 389 390 if (BytesAdded) 391 *BytesAdded = 4; 392 393 return 1; 394 } 395 396 // Two-way conditional branch. 397 instantiateCondBranch(MBB, DL, TBB, Cond); 398 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); 399 400 if (BytesAdded) 401 *BytesAdded = 8; 402 403 return 2; 404 } 405 406 // Find the original register that VReg is copied from. 407 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { 408 while (TargetRegisterInfo::isVirtualRegister(VReg)) { 409 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 410 if (!DefMI->isFullCopy()) 411 return VReg; 412 VReg = DefMI->getOperand(1).getReg(); 413 } 414 return VReg; 415 } 416 417 // Determine if VReg is defined by an instruction that can be folded into a 418 // csel instruction. If so, return the folded opcode, and the replacement 419 // register. 420 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, 421 unsigned *NewVReg = nullptr) { 422 VReg = removeCopies(MRI, VReg); 423 if (!TargetRegisterInfo::isVirtualRegister(VReg)) 424 return 0; 425 426 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); 427 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 428 unsigned Opc = 0; 429 unsigned SrcOpNum = 0; 430 switch (DefMI->getOpcode()) { 431 case AArch64::ADDSXri: 432 case AArch64::ADDSWri: 433 // if NZCV is used, do not fold. 434 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 435 return 0; 436 // fall-through to ADDXri and ADDWri. 437 LLVM_FALLTHROUGH; 438 case AArch64::ADDXri: 439 case AArch64::ADDWri: 440 // add x, 1 -> csinc. 441 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || 442 DefMI->getOperand(3).getImm() != 0) 443 return 0; 444 SrcOpNum = 1; 445 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; 446 break; 447 448 case AArch64::ORNXrr: 449 case AArch64::ORNWrr: { 450 // not x -> csinv, represented as orn dst, xzr, src. 451 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 452 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 453 return 0; 454 SrcOpNum = 2; 455 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; 456 break; 457 } 458 459 case AArch64::SUBSXrr: 460 case AArch64::SUBSWrr: 461 // if NZCV is used, do not fold. 462 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 463 return 0; 464 // fall-through to SUBXrr and SUBWrr. 465 LLVM_FALLTHROUGH; 466 case AArch64::SUBXrr: 467 case AArch64::SUBWrr: { 468 // neg x -> csneg, represented as sub dst, xzr, src. 469 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 470 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 471 return 0; 472 SrcOpNum = 2; 473 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; 474 break; 475 } 476 default: 477 return 0; 478 } 479 assert(Opc && SrcOpNum && "Missing parameters"); 480 481 if (NewVReg) 482 *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); 483 return Opc; 484 } 485 486 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, 487 ArrayRef<MachineOperand> Cond, 488 unsigned TrueReg, unsigned FalseReg, 489 int &CondCycles, int &TrueCycles, 490 int &FalseCycles) const { 491 // Check register classes. 492 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 493 const TargetRegisterClass *RC = 494 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 495 if (!RC) 496 return false; 497 498 // Expanding cbz/tbz requires an extra cycle of latency on the condition. 499 unsigned ExtraCondLat = Cond.size() != 1; 500 501 // GPRs are handled by csel. 502 // FIXME: Fold in x+1, -x, and ~x when applicable. 503 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || 504 AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 505 // Single-cycle csel, csinc, csinv, and csneg. 506 CondCycles = 1 + ExtraCondLat; 507 TrueCycles = FalseCycles = 1; 508 if (canFoldIntoCSel(MRI, TrueReg)) 509 TrueCycles = 0; 510 else if (canFoldIntoCSel(MRI, FalseReg)) 511 FalseCycles = 0; 512 return true; 513 } 514 515 // Scalar floating point is handled by fcsel. 516 // FIXME: Form fabs, fmin, and fmax when applicable. 517 if (AArch64::FPR64RegClass.hasSubClassEq(RC) || 518 AArch64::FPR32RegClass.hasSubClassEq(RC)) { 519 CondCycles = 5 + ExtraCondLat; 520 TrueCycles = FalseCycles = 2; 521 return true; 522 } 523 524 // Can't do vectors. 525 return false; 526 } 527 528 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, 529 MachineBasicBlock::iterator I, 530 const DebugLoc &DL, unsigned DstReg, 531 ArrayRef<MachineOperand> Cond, 532 unsigned TrueReg, unsigned FalseReg) const { 533 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 534 535 // Parse the condition code, see parseCondBranch() above. 536 AArch64CC::CondCode CC; 537 switch (Cond.size()) { 538 default: 539 llvm_unreachable("Unknown condition opcode in Cond"); 540 case 1: // b.cc 541 CC = AArch64CC::CondCode(Cond[0].getImm()); 542 break; 543 case 3: { // cbz/cbnz 544 // We must insert a compare against 0. 545 bool Is64Bit; 546 switch (Cond[1].getImm()) { 547 default: 548 llvm_unreachable("Unknown branch opcode in Cond"); 549 case AArch64::CBZW: 550 Is64Bit = false; 551 CC = AArch64CC::EQ; 552 break; 553 case AArch64::CBZX: 554 Is64Bit = true; 555 CC = AArch64CC::EQ; 556 break; 557 case AArch64::CBNZW: 558 Is64Bit = false; 559 CC = AArch64CC::NE; 560 break; 561 case AArch64::CBNZX: 562 Is64Bit = true; 563 CC = AArch64CC::NE; 564 break; 565 } 566 unsigned SrcReg = Cond[2].getReg(); 567 if (Is64Bit) { 568 // cmp reg, #0 is actually subs xzr, reg, #0. 569 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); 570 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) 571 .addReg(SrcReg) 572 .addImm(0) 573 .addImm(0); 574 } else { 575 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); 576 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) 577 .addReg(SrcReg) 578 .addImm(0) 579 .addImm(0); 580 } 581 break; 582 } 583 case 4: { // tbz/tbnz 584 // We must insert a tst instruction. 585 switch (Cond[1].getImm()) { 586 default: 587 llvm_unreachable("Unknown branch opcode in Cond"); 588 case AArch64::TBZW: 589 case AArch64::TBZX: 590 CC = AArch64CC::EQ; 591 break; 592 case AArch64::TBNZW: 593 case AArch64::TBNZX: 594 CC = AArch64CC::NE; 595 break; 596 } 597 // cmp reg, #foo is actually ands xzr, reg, #1<<foo. 598 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW) 599 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR) 600 .addReg(Cond[2].getReg()) 601 .addImm( 602 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32)); 603 else 604 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR) 605 .addReg(Cond[2].getReg()) 606 .addImm( 607 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); 608 break; 609 } 610 } 611 612 unsigned Opc = 0; 613 const TargetRegisterClass *RC = nullptr; 614 bool TryFold = false; 615 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) { 616 RC = &AArch64::GPR64RegClass; 617 Opc = AArch64::CSELXr; 618 TryFold = true; 619 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) { 620 RC = &AArch64::GPR32RegClass; 621 Opc = AArch64::CSELWr; 622 TryFold = true; 623 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) { 624 RC = &AArch64::FPR64RegClass; 625 Opc = AArch64::FCSELDrrr; 626 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) { 627 RC = &AArch64::FPR32RegClass; 628 Opc = AArch64::FCSELSrrr; 629 } 630 assert(RC && "Unsupported regclass"); 631 632 // Try folding simple instructions into the csel. 633 if (TryFold) { 634 unsigned NewVReg = 0; 635 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); 636 if (FoldedOpc) { 637 // The folded opcodes csinc, csinc and csneg apply the operation to 638 // FalseReg, so we need to invert the condition. 639 CC = AArch64CC::getInvertedCondCode(CC); 640 TrueReg = FalseReg; 641 } else 642 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); 643 644 // Fold the operation. Leave any dead instructions for DCE to clean up. 645 if (FoldedOpc) { 646 FalseReg = NewVReg; 647 Opc = FoldedOpc; 648 // The extends the live range of NewVReg. 649 MRI.clearKillFlags(NewVReg); 650 } 651 } 652 653 // Pull all virtual register into the appropriate class. 654 MRI.constrainRegClass(TrueReg, RC); 655 MRI.constrainRegClass(FalseReg, RC); 656 657 // Insert the csel. 658 BuildMI(MBB, I, DL, get(Opc), DstReg) 659 .addReg(TrueReg) 660 .addReg(FalseReg) 661 .addImm(CC); 662 } 663 664 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx. 665 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) { 666 uint64_t Imm = MI.getOperand(1).getImm(); 667 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 668 uint64_t Encoding; 669 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding); 670 } 671 672 // FIXME: this implementation should be micro-architecture dependent, so a 673 // micro-architecture target hook should be introduced here in future. 674 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { 675 if (!Subtarget.hasCustomCheapAsMoveHandling()) 676 return MI.isAsCheapAsAMove(); 677 678 if (Subtarget.hasExynosCheapAsMoveHandling()) { 679 if (isExynosResetFast(MI) || isExynosShiftLeftFast(MI)) 680 return true; 681 else 682 return MI.isAsCheapAsAMove(); 683 } 684 685 switch (MI.getOpcode()) { 686 default: 687 return false; 688 689 // add/sub on register without shift 690 case AArch64::ADDWri: 691 case AArch64::ADDXri: 692 case AArch64::SUBWri: 693 case AArch64::SUBXri: 694 return (MI.getOperand(3).getImm() == 0); 695 696 // logical ops on immediate 697 case AArch64::ANDWri: 698 case AArch64::ANDXri: 699 case AArch64::EORWri: 700 case AArch64::EORXri: 701 case AArch64::ORRWri: 702 case AArch64::ORRXri: 703 return true; 704 705 // logical ops on register without shift 706 case AArch64::ANDWrr: 707 case AArch64::ANDXrr: 708 case AArch64::BICWrr: 709 case AArch64::BICXrr: 710 case AArch64::EONWrr: 711 case AArch64::EONXrr: 712 case AArch64::EORWrr: 713 case AArch64::EORXrr: 714 case AArch64::ORNWrr: 715 case AArch64::ORNXrr: 716 case AArch64::ORRWrr: 717 case AArch64::ORRXrr: 718 return true; 719 720 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or 721 // ORRXri, it is as cheap as MOV 722 case AArch64::MOVi32imm: 723 return canBeExpandedToORR(MI, 32); 724 case AArch64::MOVi64imm: 725 return canBeExpandedToORR(MI, 64); 726 727 // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing 728 // feature. 729 case AArch64::FMOVH0: 730 case AArch64::FMOVS0: 731 case AArch64::FMOVD0: 732 return Subtarget.hasZeroCycleZeroing(); 733 case TargetOpcode::COPY: 734 return (Subtarget.hasZeroCycleZeroing() && 735 (MI.getOperand(1).getReg() == AArch64::WZR || 736 MI.getOperand(1).getReg() == AArch64::XZR)); 737 } 738 739 llvm_unreachable("Unknown opcode to check as cheap as a move!"); 740 } 741 742 bool AArch64InstrInfo::isExynosResetFast(const MachineInstr &MI) const { 743 unsigned Reg, Imm, Shift; 744 745 switch (MI.getOpcode()) { 746 default: 747 return false; 748 749 // MOV Rd, SP 750 case AArch64::ADDWri: 751 case AArch64::ADDXri: 752 if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm()) 753 return false; 754 755 Reg = MI.getOperand(1).getReg(); 756 Imm = MI.getOperand(2).getImm(); 757 return ((Reg == AArch64::WSP || Reg == AArch64::SP) && Imm == 0); 758 759 // Literal 760 case AArch64::ADR: 761 case AArch64::ADRP: 762 return true; 763 764 // MOVI Vd, #0 765 case AArch64::MOVID: 766 case AArch64::MOVIv8b_ns: 767 case AArch64::MOVIv2d_ns: 768 case AArch64::MOVIv16b_ns: 769 Imm = MI.getOperand(1).getImm(); 770 return (Imm == 0); 771 772 // MOVI Vd, #0 773 case AArch64::MOVIv2i32: 774 case AArch64::MOVIv4i16: 775 case AArch64::MOVIv4i32: 776 case AArch64::MOVIv8i16: 777 Imm = MI.getOperand(1).getImm(); 778 Shift = MI.getOperand(2).getImm(); 779 return (Imm == 0 && Shift == 0); 780 781 // MOV Rd, Imm 782 case AArch64::MOVNWi: 783 case AArch64::MOVNXi: 784 785 // MOV Rd, Imm 786 case AArch64::MOVZWi: 787 case AArch64::MOVZXi: 788 return true; 789 790 // MOV Rd, Imm 791 case AArch64::ORRWri: 792 case AArch64::ORRXri: 793 if (!MI.getOperand(1).isReg()) 794 return false; 795 796 Reg = MI.getOperand(1).getReg(); 797 Imm = MI.getOperand(2).getImm(); 798 return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Imm == 0); 799 800 // MOV Rd, Rm 801 case AArch64::ORRWrs: 802 case AArch64::ORRXrs: 803 if (!MI.getOperand(1).isReg()) 804 return false; 805 806 Reg = MI.getOperand(1).getReg(); 807 Imm = MI.getOperand(3).getImm(); 808 Shift = AArch64_AM::getShiftValue(Imm); 809 return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Shift == 0); 810 } 811 } 812 813 bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const { 814 unsigned Imm, Shift; 815 AArch64_AM::ShiftExtendType Ext; 816 817 switch (MI.getOpcode()) { 818 default: 819 return false; 820 821 // WriteI 822 case AArch64::ADDSWri: 823 case AArch64::ADDSXri: 824 case AArch64::ADDWri: 825 case AArch64::ADDXri: 826 case AArch64::SUBSWri: 827 case AArch64::SUBSXri: 828 case AArch64::SUBWri: 829 case AArch64::SUBXri: 830 return true; 831 832 // WriteISReg 833 case AArch64::ADDSWrs: 834 case AArch64::ADDSXrs: 835 case AArch64::ADDWrs: 836 case AArch64::ADDXrs: 837 case AArch64::ANDSWrs: 838 case AArch64::ANDSXrs: 839 case AArch64::ANDWrs: 840 case AArch64::ANDXrs: 841 case AArch64::BICSWrs: 842 case AArch64::BICSXrs: 843 case AArch64::BICWrs: 844 case AArch64::BICXrs: 845 case AArch64::EONWrs: 846 case AArch64::EONXrs: 847 case AArch64::EORWrs: 848 case AArch64::EORXrs: 849 case AArch64::ORNWrs: 850 case AArch64::ORNXrs: 851 case AArch64::ORRWrs: 852 case AArch64::ORRXrs: 853 case AArch64::SUBSWrs: 854 case AArch64::SUBSXrs: 855 case AArch64::SUBWrs: 856 case AArch64::SUBXrs: 857 Imm = MI.getOperand(3).getImm(); 858 Shift = AArch64_AM::getShiftValue(Imm); 859 Ext = AArch64_AM::getShiftType(Imm); 860 return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL)); 861 862 // WriteIEReg 863 case AArch64::ADDSWrx: 864 case AArch64::ADDSXrx: 865 case AArch64::ADDSXrx64: 866 case AArch64::ADDWrx: 867 case AArch64::ADDXrx: 868 case AArch64::ADDXrx64: 869 case AArch64::SUBSWrx: 870 case AArch64::SUBSXrx: 871 case AArch64::SUBSXrx64: 872 case AArch64::SUBWrx: 873 case AArch64::SUBXrx: 874 case AArch64::SUBXrx64: 875 Imm = MI.getOperand(3).getImm(); 876 Shift = AArch64_AM::getArithShiftValue(Imm); 877 Ext = AArch64_AM::getArithExtendType(Imm); 878 return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX)); 879 880 case AArch64::PRFMroW: 881 case AArch64::PRFMroX: 882 883 // WriteLDIdx 884 case AArch64::LDRBBroW: 885 case AArch64::LDRBBroX: 886 case AArch64::LDRHHroW: 887 case AArch64::LDRHHroX: 888 case AArch64::LDRSBWroW: 889 case AArch64::LDRSBWroX: 890 case AArch64::LDRSBXroW: 891 case AArch64::LDRSBXroX: 892 case AArch64::LDRSHWroW: 893 case AArch64::LDRSHWroX: 894 case AArch64::LDRSHXroW: 895 case AArch64::LDRSHXroX: 896 case AArch64::LDRSWroW: 897 case AArch64::LDRSWroX: 898 case AArch64::LDRWroW: 899 case AArch64::LDRWroX: 900 case AArch64::LDRXroW: 901 case AArch64::LDRXroX: 902 903 case AArch64::LDRBroW: 904 case AArch64::LDRBroX: 905 case AArch64::LDRDroW: 906 case AArch64::LDRDroX: 907 case AArch64::LDRHroW: 908 case AArch64::LDRHroX: 909 case AArch64::LDRSroW: 910 case AArch64::LDRSroX: 911 912 // WriteSTIdx 913 case AArch64::STRBBroW: 914 case AArch64::STRBBroX: 915 case AArch64::STRHHroW: 916 case AArch64::STRHHroX: 917 case AArch64::STRWroW: 918 case AArch64::STRWroX: 919 case AArch64::STRXroW: 920 case AArch64::STRXroX: 921 922 case AArch64::STRBroW: 923 case AArch64::STRBroX: 924 case AArch64::STRDroW: 925 case AArch64::STRDroX: 926 case AArch64::STRHroW: 927 case AArch64::STRHroX: 928 case AArch64::STRSroW: 929 case AArch64::STRSroX: 930 Imm = MI.getOperand(3).getImm(); 931 Ext = AArch64_AM::getMemExtendType(Imm); 932 return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX); 933 } 934 } 935 936 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const { 937 switch (MI.getOpcode()) { 938 default: 939 return false; 940 941 case AArch64::ADDWrs: 942 case AArch64::ADDXrs: 943 case AArch64::ADDSWrs: 944 case AArch64::ADDSXrs: { 945 unsigned Imm = MI.getOperand(3).getImm(); 946 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 947 if (ShiftVal == 0) 948 return true; 949 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5; 950 } 951 952 case AArch64::ADDWrx: 953 case AArch64::ADDXrx: 954 case AArch64::ADDXrx64: 955 case AArch64::ADDSWrx: 956 case AArch64::ADDSXrx: 957 case AArch64::ADDSXrx64: { 958 unsigned Imm = MI.getOperand(3).getImm(); 959 switch (AArch64_AM::getArithExtendType(Imm)) { 960 default: 961 return false; 962 case AArch64_AM::UXTB: 963 case AArch64_AM::UXTH: 964 case AArch64_AM::UXTW: 965 case AArch64_AM::UXTX: 966 return AArch64_AM::getArithShiftValue(Imm) <= 4; 967 } 968 } 969 970 case AArch64::SUBWrs: 971 case AArch64::SUBSWrs: { 972 unsigned Imm = MI.getOperand(3).getImm(); 973 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 974 return ShiftVal == 0 || 975 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31); 976 } 977 978 case AArch64::SUBXrs: 979 case AArch64::SUBSXrs: { 980 unsigned Imm = MI.getOperand(3).getImm(); 981 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 982 return ShiftVal == 0 || 983 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63); 984 } 985 986 case AArch64::SUBWrx: 987 case AArch64::SUBXrx: 988 case AArch64::SUBXrx64: 989 case AArch64::SUBSWrx: 990 case AArch64::SUBSXrx: 991 case AArch64::SUBSXrx64: { 992 unsigned Imm = MI.getOperand(3).getImm(); 993 switch (AArch64_AM::getArithExtendType(Imm)) { 994 default: 995 return false; 996 case AArch64_AM::UXTB: 997 case AArch64_AM::UXTH: 998 case AArch64_AM::UXTW: 999 case AArch64_AM::UXTX: 1000 return AArch64_AM::getArithShiftValue(Imm) == 0; 1001 } 1002 } 1003 1004 case AArch64::LDRBBroW: 1005 case AArch64::LDRBBroX: 1006 case AArch64::LDRBroW: 1007 case AArch64::LDRBroX: 1008 case AArch64::LDRDroW: 1009 case AArch64::LDRDroX: 1010 case AArch64::LDRHHroW: 1011 case AArch64::LDRHHroX: 1012 case AArch64::LDRHroW: 1013 case AArch64::LDRHroX: 1014 case AArch64::LDRQroW: 1015 case AArch64::LDRQroX: 1016 case AArch64::LDRSBWroW: 1017 case AArch64::LDRSBWroX: 1018 case AArch64::LDRSBXroW: 1019 case AArch64::LDRSBXroX: 1020 case AArch64::LDRSHWroW: 1021 case AArch64::LDRSHWroX: 1022 case AArch64::LDRSHXroW: 1023 case AArch64::LDRSHXroX: 1024 case AArch64::LDRSWroW: 1025 case AArch64::LDRSWroX: 1026 case AArch64::LDRSroW: 1027 case AArch64::LDRSroX: 1028 case AArch64::LDRWroW: 1029 case AArch64::LDRWroX: 1030 case AArch64::LDRXroW: 1031 case AArch64::LDRXroX: 1032 case AArch64::PRFMroW: 1033 case AArch64::PRFMroX: 1034 case AArch64::STRBBroW: 1035 case AArch64::STRBBroX: 1036 case AArch64::STRBroW: 1037 case AArch64::STRBroX: 1038 case AArch64::STRDroW: 1039 case AArch64::STRDroX: 1040 case AArch64::STRHHroW: 1041 case AArch64::STRHHroX: 1042 case AArch64::STRHroW: 1043 case AArch64::STRHroX: 1044 case AArch64::STRQroW: 1045 case AArch64::STRQroX: 1046 case AArch64::STRSroW: 1047 case AArch64::STRSroX: 1048 case AArch64::STRWroW: 1049 case AArch64::STRWroX: 1050 case AArch64::STRXroW: 1051 case AArch64::STRXroX: { 1052 unsigned IsSigned = MI.getOperand(3).getImm(); 1053 return !IsSigned; 1054 } 1055 } 1056 } 1057 1058 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 1059 unsigned &SrcReg, unsigned &DstReg, 1060 unsigned &SubIdx) const { 1061 switch (MI.getOpcode()) { 1062 default: 1063 return false; 1064 case AArch64::SBFMXri: // aka sxtw 1065 case AArch64::UBFMXri: // aka uxtw 1066 // Check for the 32 -> 64 bit extension case, these instructions can do 1067 // much more. 1068 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) 1069 return false; 1070 // This is a signed or unsigned 32 -> 64 bit extension. 1071 SrcReg = MI.getOperand(1).getReg(); 1072 DstReg = MI.getOperand(0).getReg(); 1073 SubIdx = AArch64::sub_32; 1074 return true; 1075 } 1076 } 1077 1078 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( 1079 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const { 1080 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1081 unsigned BaseRegA = 0, BaseRegB = 0; 1082 int64_t OffsetA = 0, OffsetB = 0; 1083 unsigned WidthA = 0, WidthB = 0; 1084 1085 assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); 1086 assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); 1087 1088 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || 1089 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 1090 return false; 1091 1092 // Retrieve the base register, offset from the base register and width. Width 1093 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If 1094 // base registers are identical, and the offset of a lower memory access + 1095 // the width doesn't overlap the offset of a higher memory access, 1096 // then the memory accesses are different. 1097 if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) && 1098 getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) { 1099 if (BaseRegA == BaseRegB) { 1100 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 1101 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 1102 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 1103 if (LowOffset + LowWidth <= HighOffset) 1104 return true; 1105 } 1106 } 1107 return false; 1108 } 1109 1110 /// analyzeCompare - For a comparison instruction, return the source registers 1111 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. 1112 /// Return true if the comparison instruction can be analyzed. 1113 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, 1114 unsigned &SrcReg2, int &CmpMask, 1115 int &CmpValue) const { 1116 // The first operand can be a frame index where we'd normally expect a 1117 // register. 1118 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands"); 1119 if (!MI.getOperand(1).isReg()) 1120 return false; 1121 1122 switch (MI.getOpcode()) { 1123 default: 1124 break; 1125 case AArch64::SUBSWrr: 1126 case AArch64::SUBSWrs: 1127 case AArch64::SUBSWrx: 1128 case AArch64::SUBSXrr: 1129 case AArch64::SUBSXrs: 1130 case AArch64::SUBSXrx: 1131 case AArch64::ADDSWrr: 1132 case AArch64::ADDSWrs: 1133 case AArch64::ADDSWrx: 1134 case AArch64::ADDSXrr: 1135 case AArch64::ADDSXrs: 1136 case AArch64::ADDSXrx: 1137 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1138 SrcReg = MI.getOperand(1).getReg(); 1139 SrcReg2 = MI.getOperand(2).getReg(); 1140 CmpMask = ~0; 1141 CmpValue = 0; 1142 return true; 1143 case AArch64::SUBSWri: 1144 case AArch64::ADDSWri: 1145 case AArch64::SUBSXri: 1146 case AArch64::ADDSXri: 1147 SrcReg = MI.getOperand(1).getReg(); 1148 SrcReg2 = 0; 1149 CmpMask = ~0; 1150 // FIXME: In order to convert CmpValue to 0 or 1 1151 CmpValue = MI.getOperand(2).getImm() != 0; 1152 return true; 1153 case AArch64::ANDSWri: 1154 case AArch64::ANDSXri: 1155 // ANDS does not use the same encoding scheme as the others xxxS 1156 // instructions. 1157 SrcReg = MI.getOperand(1).getReg(); 1158 SrcReg2 = 0; 1159 CmpMask = ~0; 1160 // FIXME:The return val type of decodeLogicalImmediate is uint64_t, 1161 // while the type of CmpValue is int. When converting uint64_t to int, 1162 // the high 32 bits of uint64_t will be lost. 1163 // In fact it causes a bug in spec2006-483.xalancbmk 1164 // CmpValue is only used to compare with zero in OptimizeCompareInstr 1165 CmpValue = AArch64_AM::decodeLogicalImmediate( 1166 MI.getOperand(2).getImm(), 1167 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0; 1168 return true; 1169 } 1170 1171 return false; 1172 } 1173 1174 static bool UpdateOperandRegClass(MachineInstr &Instr) { 1175 MachineBasicBlock *MBB = Instr.getParent(); 1176 assert(MBB && "Can't get MachineBasicBlock here"); 1177 MachineFunction *MF = MBB->getParent(); 1178 assert(MF && "Can't get MachineFunction here"); 1179 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 1180 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 1181 MachineRegisterInfo *MRI = &MF->getRegInfo(); 1182 1183 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx; 1184 ++OpIdx) { 1185 MachineOperand &MO = Instr.getOperand(OpIdx); 1186 const TargetRegisterClass *OpRegCstraints = 1187 Instr.getRegClassConstraint(OpIdx, TII, TRI); 1188 1189 // If there's no constraint, there's nothing to do. 1190 if (!OpRegCstraints) 1191 continue; 1192 // If the operand is a frame index, there's nothing to do here. 1193 // A frame index operand will resolve correctly during PEI. 1194 if (MO.isFI()) 1195 continue; 1196 1197 assert(MO.isReg() && 1198 "Operand has register constraints without being a register!"); 1199 1200 unsigned Reg = MO.getReg(); 1201 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 1202 if (!OpRegCstraints->contains(Reg)) 1203 return false; 1204 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && 1205 !MRI->constrainRegClass(Reg, OpRegCstraints)) 1206 return false; 1207 } 1208 1209 return true; 1210 } 1211 1212 /// Return the opcode that does not set flags when possible - otherwise 1213 /// return the original opcode. The caller is responsible to do the actual 1214 /// substitution and legality checking. 1215 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) { 1216 // Don't convert all compare instructions, because for some the zero register 1217 // encoding becomes the sp register. 1218 bool MIDefinesZeroReg = false; 1219 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR)) 1220 MIDefinesZeroReg = true; 1221 1222 switch (MI.getOpcode()) { 1223 default: 1224 return MI.getOpcode(); 1225 case AArch64::ADDSWrr: 1226 return AArch64::ADDWrr; 1227 case AArch64::ADDSWri: 1228 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri; 1229 case AArch64::ADDSWrs: 1230 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs; 1231 case AArch64::ADDSWrx: 1232 return AArch64::ADDWrx; 1233 case AArch64::ADDSXrr: 1234 return AArch64::ADDXrr; 1235 case AArch64::ADDSXri: 1236 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri; 1237 case AArch64::ADDSXrs: 1238 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs; 1239 case AArch64::ADDSXrx: 1240 return AArch64::ADDXrx; 1241 case AArch64::SUBSWrr: 1242 return AArch64::SUBWrr; 1243 case AArch64::SUBSWri: 1244 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri; 1245 case AArch64::SUBSWrs: 1246 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs; 1247 case AArch64::SUBSWrx: 1248 return AArch64::SUBWrx; 1249 case AArch64::SUBSXrr: 1250 return AArch64::SUBXrr; 1251 case AArch64::SUBSXri: 1252 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri; 1253 case AArch64::SUBSXrs: 1254 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs; 1255 case AArch64::SUBSXrx: 1256 return AArch64::SUBXrx; 1257 } 1258 } 1259 1260 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 }; 1261 1262 /// True when condition flags are accessed (either by writing or reading) 1263 /// on the instruction trace starting at From and ending at To. 1264 /// 1265 /// Note: If From and To are from different blocks it's assumed CC are accessed 1266 /// on the path. 1267 static bool areCFlagsAccessedBetweenInstrs( 1268 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, 1269 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) { 1270 // Early exit if To is at the beginning of the BB. 1271 if (To == To->getParent()->begin()) 1272 return true; 1273 1274 // Check whether the instructions are in the same basic block 1275 // If not, assume the condition flags might get modified somewhere. 1276 if (To->getParent() != From->getParent()) 1277 return true; 1278 1279 // From must be above To. 1280 assert(std::find_if(++To.getReverse(), To->getParent()->rend(), 1281 [From](MachineInstr &MI) { 1282 return MI.getIterator() == From; 1283 }) != To->getParent()->rend()); 1284 1285 // We iterate backward starting \p To until we hit \p From. 1286 for (--To; To != From; --To) { 1287 const MachineInstr &Instr = *To; 1288 1289 if (((AccessToCheck & AK_Write) && 1290 Instr.modifiesRegister(AArch64::NZCV, TRI)) || 1291 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) 1292 return true; 1293 } 1294 return false; 1295 } 1296 1297 /// Try to optimize a compare instruction. A compare instruction is an 1298 /// instruction which produces AArch64::NZCV. It can be truly compare 1299 /// instruction 1300 /// when there are no uses of its destination register. 1301 /// 1302 /// The following steps are tried in order: 1303 /// 1. Convert CmpInstr into an unconditional version. 1304 /// 2. Remove CmpInstr if above there is an instruction producing a needed 1305 /// condition code or an instruction which can be converted into such an 1306 /// instruction. 1307 /// Only comparison with zero is supported. 1308 bool AArch64InstrInfo::optimizeCompareInstr( 1309 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, 1310 int CmpValue, const MachineRegisterInfo *MRI) const { 1311 assert(CmpInstr.getParent()); 1312 assert(MRI); 1313 1314 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1315 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true); 1316 if (DeadNZCVIdx != -1) { 1317 if (CmpInstr.definesRegister(AArch64::WZR) || 1318 CmpInstr.definesRegister(AArch64::XZR)) { 1319 CmpInstr.eraseFromParent(); 1320 return true; 1321 } 1322 unsigned Opc = CmpInstr.getOpcode(); 1323 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr); 1324 if (NewOpc == Opc) 1325 return false; 1326 const MCInstrDesc &MCID = get(NewOpc); 1327 CmpInstr.setDesc(MCID); 1328 CmpInstr.RemoveOperand(DeadNZCVIdx); 1329 bool succeeded = UpdateOperandRegClass(CmpInstr); 1330 (void)succeeded; 1331 assert(succeeded && "Some operands reg class are incompatible!"); 1332 return true; 1333 } 1334 1335 // Continue only if we have a "ri" where immediate is zero. 1336 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare 1337 // function. 1338 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); 1339 if (CmpValue != 0 || SrcReg2 != 0) 1340 return false; 1341 1342 // CmpInstr is a Compare instruction if destination register is not used. 1343 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) 1344 return false; 1345 1346 return substituteCmpToZero(CmpInstr, SrcReg, MRI); 1347 } 1348 1349 /// Get opcode of S version of Instr. 1350 /// If Instr is S version its opcode is returned. 1351 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version 1352 /// or we are not interested in it. 1353 static unsigned sForm(MachineInstr &Instr) { 1354 switch (Instr.getOpcode()) { 1355 default: 1356 return AArch64::INSTRUCTION_LIST_END; 1357 1358 case AArch64::ADDSWrr: 1359 case AArch64::ADDSWri: 1360 case AArch64::ADDSXrr: 1361 case AArch64::ADDSXri: 1362 case AArch64::SUBSWrr: 1363 case AArch64::SUBSWri: 1364 case AArch64::SUBSXrr: 1365 case AArch64::SUBSXri: 1366 return Instr.getOpcode(); 1367 1368 case AArch64::ADDWrr: 1369 return AArch64::ADDSWrr; 1370 case AArch64::ADDWri: 1371 return AArch64::ADDSWri; 1372 case AArch64::ADDXrr: 1373 return AArch64::ADDSXrr; 1374 case AArch64::ADDXri: 1375 return AArch64::ADDSXri; 1376 case AArch64::ADCWr: 1377 return AArch64::ADCSWr; 1378 case AArch64::ADCXr: 1379 return AArch64::ADCSXr; 1380 case AArch64::SUBWrr: 1381 return AArch64::SUBSWrr; 1382 case AArch64::SUBWri: 1383 return AArch64::SUBSWri; 1384 case AArch64::SUBXrr: 1385 return AArch64::SUBSXrr; 1386 case AArch64::SUBXri: 1387 return AArch64::SUBSXri; 1388 case AArch64::SBCWr: 1389 return AArch64::SBCSWr; 1390 case AArch64::SBCXr: 1391 return AArch64::SBCSXr; 1392 case AArch64::ANDWri: 1393 return AArch64::ANDSWri; 1394 case AArch64::ANDXri: 1395 return AArch64::ANDSXri; 1396 } 1397 } 1398 1399 /// Check if AArch64::NZCV should be alive in successors of MBB. 1400 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) { 1401 for (auto *BB : MBB->successors()) 1402 if (BB->isLiveIn(AArch64::NZCV)) 1403 return true; 1404 return false; 1405 } 1406 1407 namespace { 1408 1409 struct UsedNZCV { 1410 bool N = false; 1411 bool Z = false; 1412 bool C = false; 1413 bool V = false; 1414 1415 UsedNZCV() = default; 1416 1417 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) { 1418 this->N |= UsedFlags.N; 1419 this->Z |= UsedFlags.Z; 1420 this->C |= UsedFlags.C; 1421 this->V |= UsedFlags.V; 1422 return *this; 1423 } 1424 }; 1425 1426 } // end anonymous namespace 1427 1428 /// Find a condition code used by the instruction. 1429 /// Returns AArch64CC::Invalid if either the instruction does not use condition 1430 /// codes or we don't optimize CmpInstr in the presence of such instructions. 1431 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) { 1432 switch (Instr.getOpcode()) { 1433 default: 1434 return AArch64CC::Invalid; 1435 1436 case AArch64::Bcc: { 1437 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1438 assert(Idx >= 2); 1439 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm()); 1440 } 1441 1442 case AArch64::CSINVWr: 1443 case AArch64::CSINVXr: 1444 case AArch64::CSINCWr: 1445 case AArch64::CSINCXr: 1446 case AArch64::CSELWr: 1447 case AArch64::CSELXr: 1448 case AArch64::CSNEGWr: 1449 case AArch64::CSNEGXr: 1450 case AArch64::FCSELSrrr: 1451 case AArch64::FCSELDrrr: { 1452 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1453 assert(Idx >= 1); 1454 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm()); 1455 } 1456 } 1457 } 1458 1459 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { 1460 assert(CC != AArch64CC::Invalid); 1461 UsedNZCV UsedFlags; 1462 switch (CC) { 1463 default: 1464 break; 1465 1466 case AArch64CC::EQ: // Z set 1467 case AArch64CC::NE: // Z clear 1468 UsedFlags.Z = true; 1469 break; 1470 1471 case AArch64CC::HI: // Z clear and C set 1472 case AArch64CC::LS: // Z set or C clear 1473 UsedFlags.Z = true; 1474 LLVM_FALLTHROUGH; 1475 case AArch64CC::HS: // C set 1476 case AArch64CC::LO: // C clear 1477 UsedFlags.C = true; 1478 break; 1479 1480 case AArch64CC::MI: // N set 1481 case AArch64CC::PL: // N clear 1482 UsedFlags.N = true; 1483 break; 1484 1485 case AArch64CC::VS: // V set 1486 case AArch64CC::VC: // V clear 1487 UsedFlags.V = true; 1488 break; 1489 1490 case AArch64CC::GT: // Z clear, N and V the same 1491 case AArch64CC::LE: // Z set, N and V differ 1492 UsedFlags.Z = true; 1493 LLVM_FALLTHROUGH; 1494 case AArch64CC::GE: // N and V the same 1495 case AArch64CC::LT: // N and V differ 1496 UsedFlags.N = true; 1497 UsedFlags.V = true; 1498 break; 1499 } 1500 return UsedFlags; 1501 } 1502 1503 static bool isADDSRegImm(unsigned Opcode) { 1504 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri; 1505 } 1506 1507 static bool isSUBSRegImm(unsigned Opcode) { 1508 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri; 1509 } 1510 1511 /// Check if CmpInstr can be substituted by MI. 1512 /// 1513 /// CmpInstr can be substituted: 1514 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0' 1515 /// - and, MI and CmpInstr are from the same MachineBB 1516 /// - and, condition flags are not alive in successors of the CmpInstr parent 1517 /// - and, if MI opcode is the S form there must be no defs of flags between 1518 /// MI and CmpInstr 1519 /// or if MI opcode is not the S form there must be neither defs of flags 1520 /// nor uses of flags between MI and CmpInstr. 1521 /// - and C/V flags are not used after CmpInstr 1522 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, 1523 const TargetRegisterInfo *TRI) { 1524 assert(MI); 1525 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END); 1526 assert(CmpInstr); 1527 1528 const unsigned CmpOpcode = CmpInstr->getOpcode(); 1529 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode)) 1530 return false; 1531 1532 if (MI->getParent() != CmpInstr->getParent()) 1533 return false; 1534 1535 if (areCFlagsAliveInSuccessors(CmpInstr->getParent())) 1536 return false; 1537 1538 AccessKind AccessToCheck = AK_Write; 1539 if (sForm(*MI) != MI->getOpcode()) 1540 AccessToCheck = AK_All; 1541 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck)) 1542 return false; 1543 1544 UsedNZCV NZCVUsedAfterCmp; 1545 for (auto I = std::next(CmpInstr->getIterator()), 1546 E = CmpInstr->getParent()->instr_end(); 1547 I != E; ++I) { 1548 const MachineInstr &Instr = *I; 1549 if (Instr.readsRegister(AArch64::NZCV, TRI)) { 1550 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr); 1551 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction 1552 return false; 1553 NZCVUsedAfterCmp |= getUsedNZCV(CC); 1554 } 1555 1556 if (Instr.modifiesRegister(AArch64::NZCV, TRI)) 1557 break; 1558 } 1559 1560 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V; 1561 } 1562 1563 /// Substitute an instruction comparing to zero with another instruction 1564 /// which produces needed condition flags. 1565 /// 1566 /// Return true on success. 1567 bool AArch64InstrInfo::substituteCmpToZero( 1568 MachineInstr &CmpInstr, unsigned SrcReg, 1569 const MachineRegisterInfo *MRI) const { 1570 assert(MRI); 1571 // Get the unique definition of SrcReg. 1572 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 1573 if (!MI) 1574 return false; 1575 1576 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1577 1578 unsigned NewOpc = sForm(*MI); 1579 if (NewOpc == AArch64::INSTRUCTION_LIST_END) 1580 return false; 1581 1582 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI)) 1583 return false; 1584 1585 // Update the instruction to set NZCV. 1586 MI->setDesc(get(NewOpc)); 1587 CmpInstr.eraseFromParent(); 1588 bool succeeded = UpdateOperandRegClass(*MI); 1589 (void)succeeded; 1590 assert(succeeded && "Some operands reg class are incompatible!"); 1591 MI->addRegisterDefined(AArch64::NZCV, TRI); 1592 return true; 1593 } 1594 1595 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 1596 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD) 1597 return false; 1598 1599 MachineBasicBlock &MBB = *MI.getParent(); 1600 DebugLoc DL = MI.getDebugLoc(); 1601 unsigned Reg = MI.getOperand(0).getReg(); 1602 const GlobalValue *GV = 1603 cast<GlobalValue>((*MI.memoperands_begin())->getValue()); 1604 const TargetMachine &TM = MBB.getParent()->getTarget(); 1605 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); 1606 const unsigned char MO_NC = AArch64II::MO_NC; 1607 1608 if ((OpFlags & AArch64II::MO_GOT) != 0) { 1609 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) 1610 .addGlobalAddress(GV, 0, AArch64II::MO_GOT); 1611 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1612 .addReg(Reg, RegState::Kill) 1613 .addImm(0) 1614 .addMemOperand(*MI.memoperands_begin()); 1615 } else if (TM.getCodeModel() == CodeModel::Large) { 1616 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) 1617 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC) 1618 .addImm(0); 1619 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1620 .addReg(Reg, RegState::Kill) 1621 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC) 1622 .addImm(16); 1623 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1624 .addReg(Reg, RegState::Kill) 1625 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC) 1626 .addImm(32); 1627 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1628 .addReg(Reg, RegState::Kill) 1629 .addGlobalAddress(GV, 0, AArch64II::MO_G3) 1630 .addImm(48); 1631 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1632 .addReg(Reg, RegState::Kill) 1633 .addImm(0) 1634 .addMemOperand(*MI.memoperands_begin()); 1635 } else { 1636 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) 1637 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); 1638 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; 1639 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1640 .addReg(Reg, RegState::Kill) 1641 .addGlobalAddress(GV, 0, LoFlags) 1642 .addMemOperand(*MI.memoperands_begin()); 1643 } 1644 1645 MBB.erase(MI); 1646 1647 return true; 1648 } 1649 1650 /// Return true if this is this instruction has a non-zero immediate 1651 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) { 1652 switch (MI.getOpcode()) { 1653 default: 1654 break; 1655 case AArch64::ADDSWrs: 1656 case AArch64::ADDSXrs: 1657 case AArch64::ADDWrs: 1658 case AArch64::ADDXrs: 1659 case AArch64::ANDSWrs: 1660 case AArch64::ANDSXrs: 1661 case AArch64::ANDWrs: 1662 case AArch64::ANDXrs: 1663 case AArch64::BICSWrs: 1664 case AArch64::BICSXrs: 1665 case AArch64::BICWrs: 1666 case AArch64::BICXrs: 1667 case AArch64::EONWrs: 1668 case AArch64::EONXrs: 1669 case AArch64::EORWrs: 1670 case AArch64::EORXrs: 1671 case AArch64::ORNWrs: 1672 case AArch64::ORNXrs: 1673 case AArch64::ORRWrs: 1674 case AArch64::ORRXrs: 1675 case AArch64::SUBSWrs: 1676 case AArch64::SUBSXrs: 1677 case AArch64::SUBWrs: 1678 case AArch64::SUBXrs: 1679 if (MI.getOperand(3).isImm()) { 1680 unsigned val = MI.getOperand(3).getImm(); 1681 return (val != 0); 1682 } 1683 break; 1684 } 1685 return false; 1686 } 1687 1688 /// Return true if this is this instruction has a non-zero immediate 1689 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) { 1690 switch (MI.getOpcode()) { 1691 default: 1692 break; 1693 case AArch64::ADDSWrx: 1694 case AArch64::ADDSXrx: 1695 case AArch64::ADDSXrx64: 1696 case AArch64::ADDWrx: 1697 case AArch64::ADDXrx: 1698 case AArch64::ADDXrx64: 1699 case AArch64::SUBSWrx: 1700 case AArch64::SUBSXrx: 1701 case AArch64::SUBSXrx64: 1702 case AArch64::SUBWrx: 1703 case AArch64::SUBXrx: 1704 case AArch64::SUBXrx64: 1705 if (MI.getOperand(3).isImm()) { 1706 unsigned val = MI.getOperand(3).getImm(); 1707 return (val != 0); 1708 } 1709 break; 1710 } 1711 1712 return false; 1713 } 1714 1715 // Return true if this instruction simply sets its single destination register 1716 // to zero. This is equivalent to a register rename of the zero-register. 1717 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) { 1718 switch (MI.getOpcode()) { 1719 default: 1720 break; 1721 case AArch64::MOVZWi: 1722 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) 1723 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) { 1724 assert(MI.getDesc().getNumOperands() == 3 && 1725 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands"); 1726 return true; 1727 } 1728 break; 1729 case AArch64::ANDWri: // and Rd, Rzr, #imm 1730 return MI.getOperand(1).getReg() == AArch64::WZR; 1731 case AArch64::ANDXri: 1732 return MI.getOperand(1).getReg() == AArch64::XZR; 1733 case TargetOpcode::COPY: 1734 return MI.getOperand(1).getReg() == AArch64::WZR; 1735 } 1736 return false; 1737 } 1738 1739 // Return true if this instruction simply renames a general register without 1740 // modifying bits. 1741 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) { 1742 switch (MI.getOpcode()) { 1743 default: 1744 break; 1745 case TargetOpcode::COPY: { 1746 // GPR32 copies will by lowered to ORRXrs 1747 unsigned DstReg = MI.getOperand(0).getReg(); 1748 return (AArch64::GPR32RegClass.contains(DstReg) || 1749 AArch64::GPR64RegClass.contains(DstReg)); 1750 } 1751 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) 1752 if (MI.getOperand(1).getReg() == AArch64::XZR) { 1753 assert(MI.getDesc().getNumOperands() == 4 && 1754 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands"); 1755 return true; 1756 } 1757 break; 1758 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) 1759 if (MI.getOperand(2).getImm() == 0) { 1760 assert(MI.getDesc().getNumOperands() == 4 && 1761 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands"); 1762 return true; 1763 } 1764 break; 1765 } 1766 return false; 1767 } 1768 1769 // Return true if this instruction simply renames a general register without 1770 // modifying bits. 1771 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) { 1772 switch (MI.getOpcode()) { 1773 default: 1774 break; 1775 case TargetOpcode::COPY: { 1776 // FPR64 copies will by lowered to ORR.16b 1777 unsigned DstReg = MI.getOperand(0).getReg(); 1778 return (AArch64::FPR64RegClass.contains(DstReg) || 1779 AArch64::FPR128RegClass.contains(DstReg)); 1780 } 1781 case AArch64::ORRv16i8: 1782 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { 1783 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() && 1784 "invalid ORRv16i8 operands"); 1785 return true; 1786 } 1787 break; 1788 } 1789 return false; 1790 } 1791 1792 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 1793 int &FrameIndex) const { 1794 switch (MI.getOpcode()) { 1795 default: 1796 break; 1797 case AArch64::LDRWui: 1798 case AArch64::LDRXui: 1799 case AArch64::LDRBui: 1800 case AArch64::LDRHui: 1801 case AArch64::LDRSui: 1802 case AArch64::LDRDui: 1803 case AArch64::LDRQui: 1804 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 1805 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 1806 FrameIndex = MI.getOperand(1).getIndex(); 1807 return MI.getOperand(0).getReg(); 1808 } 1809 break; 1810 } 1811 1812 return 0; 1813 } 1814 1815 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI, 1816 int &FrameIndex) const { 1817 switch (MI.getOpcode()) { 1818 default: 1819 break; 1820 case AArch64::STRWui: 1821 case AArch64::STRXui: 1822 case AArch64::STRBui: 1823 case AArch64::STRHui: 1824 case AArch64::STRSui: 1825 case AArch64::STRDui: 1826 case AArch64::STRQui: 1827 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 1828 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 1829 FrameIndex = MI.getOperand(1).getIndex(); 1830 return MI.getOperand(0).getReg(); 1831 } 1832 break; 1833 } 1834 return 0; 1835 } 1836 1837 /// Return true if this is load/store scales or extends its register offset. 1838 /// This refers to scaling a dynamic index as opposed to scaled immediates. 1839 /// MI should be a memory op that allows scaled addressing. 1840 bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) { 1841 switch (MI.getOpcode()) { 1842 default: 1843 break; 1844 case AArch64::LDRBBroW: 1845 case AArch64::LDRBroW: 1846 case AArch64::LDRDroW: 1847 case AArch64::LDRHHroW: 1848 case AArch64::LDRHroW: 1849 case AArch64::LDRQroW: 1850 case AArch64::LDRSBWroW: 1851 case AArch64::LDRSBXroW: 1852 case AArch64::LDRSHWroW: 1853 case AArch64::LDRSHXroW: 1854 case AArch64::LDRSWroW: 1855 case AArch64::LDRSroW: 1856 case AArch64::LDRWroW: 1857 case AArch64::LDRXroW: 1858 case AArch64::STRBBroW: 1859 case AArch64::STRBroW: 1860 case AArch64::STRDroW: 1861 case AArch64::STRHHroW: 1862 case AArch64::STRHroW: 1863 case AArch64::STRQroW: 1864 case AArch64::STRSroW: 1865 case AArch64::STRWroW: 1866 case AArch64::STRXroW: 1867 case AArch64::LDRBBroX: 1868 case AArch64::LDRBroX: 1869 case AArch64::LDRDroX: 1870 case AArch64::LDRHHroX: 1871 case AArch64::LDRHroX: 1872 case AArch64::LDRQroX: 1873 case AArch64::LDRSBWroX: 1874 case AArch64::LDRSBXroX: 1875 case AArch64::LDRSHWroX: 1876 case AArch64::LDRSHXroX: 1877 case AArch64::LDRSWroX: 1878 case AArch64::LDRSroX: 1879 case AArch64::LDRWroX: 1880 case AArch64::LDRXroX: 1881 case AArch64::STRBBroX: 1882 case AArch64::STRBroX: 1883 case AArch64::STRDroX: 1884 case AArch64::STRHHroX: 1885 case AArch64::STRHroX: 1886 case AArch64::STRQroX: 1887 case AArch64::STRSroX: 1888 case AArch64::STRWroX: 1889 case AArch64::STRXroX: 1890 1891 unsigned Val = MI.getOperand(3).getImm(); 1892 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val); 1893 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val); 1894 } 1895 return false; 1896 } 1897 1898 /// Check all MachineMemOperands for a hint to suppress pairing. 1899 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) { 1900 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 1901 return MMO->getFlags() & MOSuppressPair; 1902 }); 1903 } 1904 1905 /// Set a flag on the first MachineMemOperand to suppress pairing. 1906 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) { 1907 if (MI.memoperands_empty()) 1908 return; 1909 (*MI.memoperands_begin())->setFlags(MOSuppressPair); 1910 } 1911 1912 /// Check all MachineMemOperands for a hint that the load/store is strided. 1913 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) { 1914 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 1915 return MMO->getFlags() & MOStridedAccess; 1916 }); 1917 } 1918 1919 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) { 1920 switch (Opc) { 1921 default: 1922 return false; 1923 case AArch64::STURSi: 1924 case AArch64::STURDi: 1925 case AArch64::STURQi: 1926 case AArch64::STURBBi: 1927 case AArch64::STURHHi: 1928 case AArch64::STURWi: 1929 case AArch64::STURXi: 1930 case AArch64::LDURSi: 1931 case AArch64::LDURDi: 1932 case AArch64::LDURQi: 1933 case AArch64::LDURWi: 1934 case AArch64::LDURXi: 1935 case AArch64::LDURSWi: 1936 case AArch64::LDURHHi: 1937 case AArch64::LDURBBi: 1938 case AArch64::LDURSBWi: 1939 case AArch64::LDURSHWi: 1940 return true; 1941 } 1942 } 1943 1944 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) { 1945 switch (MI.getOpcode()) { 1946 default: 1947 return false; 1948 // Scaled instructions. 1949 case AArch64::STRSui: 1950 case AArch64::STRDui: 1951 case AArch64::STRQui: 1952 case AArch64::STRXui: 1953 case AArch64::STRWui: 1954 case AArch64::LDRSui: 1955 case AArch64::LDRDui: 1956 case AArch64::LDRQui: 1957 case AArch64::LDRXui: 1958 case AArch64::LDRWui: 1959 case AArch64::LDRSWui: 1960 // Unscaled instructions. 1961 case AArch64::STURSi: 1962 case AArch64::STURDi: 1963 case AArch64::STURQi: 1964 case AArch64::STURWi: 1965 case AArch64::STURXi: 1966 case AArch64::LDURSi: 1967 case AArch64::LDURDi: 1968 case AArch64::LDURQi: 1969 case AArch64::LDURWi: 1970 case AArch64::LDURXi: 1971 case AArch64::LDURSWi: 1972 return true; 1973 } 1974 } 1975 1976 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc, 1977 bool &Is64Bit) { 1978 switch (Opc) { 1979 default: 1980 llvm_unreachable("Opcode has no flag setting equivalent!"); 1981 // 32-bit cases: 1982 case AArch64::ADDWri: 1983 Is64Bit = false; 1984 return AArch64::ADDSWri; 1985 case AArch64::ADDWrr: 1986 Is64Bit = false; 1987 return AArch64::ADDSWrr; 1988 case AArch64::ADDWrs: 1989 Is64Bit = false; 1990 return AArch64::ADDSWrs; 1991 case AArch64::ADDWrx: 1992 Is64Bit = false; 1993 return AArch64::ADDSWrx; 1994 case AArch64::ANDWri: 1995 Is64Bit = false; 1996 return AArch64::ANDSWri; 1997 case AArch64::ANDWrr: 1998 Is64Bit = false; 1999 return AArch64::ANDSWrr; 2000 case AArch64::ANDWrs: 2001 Is64Bit = false; 2002 return AArch64::ANDSWrs; 2003 case AArch64::BICWrr: 2004 Is64Bit = false; 2005 return AArch64::BICSWrr; 2006 case AArch64::BICWrs: 2007 Is64Bit = false; 2008 return AArch64::BICSWrs; 2009 case AArch64::SUBWri: 2010 Is64Bit = false; 2011 return AArch64::SUBSWri; 2012 case AArch64::SUBWrr: 2013 Is64Bit = false; 2014 return AArch64::SUBSWrr; 2015 case AArch64::SUBWrs: 2016 Is64Bit = false; 2017 return AArch64::SUBSWrs; 2018 case AArch64::SUBWrx: 2019 Is64Bit = false; 2020 return AArch64::SUBSWrx; 2021 // 64-bit cases: 2022 case AArch64::ADDXri: 2023 Is64Bit = true; 2024 return AArch64::ADDSXri; 2025 case AArch64::ADDXrr: 2026 Is64Bit = true; 2027 return AArch64::ADDSXrr; 2028 case AArch64::ADDXrs: 2029 Is64Bit = true; 2030 return AArch64::ADDSXrs; 2031 case AArch64::ADDXrx: 2032 Is64Bit = true; 2033 return AArch64::ADDSXrx; 2034 case AArch64::ANDXri: 2035 Is64Bit = true; 2036 return AArch64::ANDSXri; 2037 case AArch64::ANDXrr: 2038 Is64Bit = true; 2039 return AArch64::ANDSXrr; 2040 case AArch64::ANDXrs: 2041 Is64Bit = true; 2042 return AArch64::ANDSXrs; 2043 case AArch64::BICXrr: 2044 Is64Bit = true; 2045 return AArch64::BICSXrr; 2046 case AArch64::BICXrs: 2047 Is64Bit = true; 2048 return AArch64::BICSXrs; 2049 case AArch64::SUBXri: 2050 Is64Bit = true; 2051 return AArch64::SUBSXri; 2052 case AArch64::SUBXrr: 2053 Is64Bit = true; 2054 return AArch64::SUBSXrr; 2055 case AArch64::SUBXrs: 2056 Is64Bit = true; 2057 return AArch64::SUBSXrs; 2058 case AArch64::SUBXrx: 2059 Is64Bit = true; 2060 return AArch64::SUBSXrx; 2061 } 2062 } 2063 2064 // Is this a candidate for ld/st merging or pairing? For example, we don't 2065 // touch volatiles or load/stores that have a hint to avoid pair formation. 2066 bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const { 2067 // If this is a volatile load/store, don't mess with it. 2068 if (MI.hasOrderedMemoryRef()) 2069 return false; 2070 2071 // Make sure this is a reg+imm (as opposed to an address reloc). 2072 assert(MI.getOperand(1).isReg() && "Expected a reg operand."); 2073 if (!MI.getOperand(2).isImm()) 2074 return false; 2075 2076 // Can't merge/pair if the instruction modifies the base register. 2077 // e.g., ldr x0, [x0] 2078 unsigned BaseReg = MI.getOperand(1).getReg(); 2079 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2080 if (MI.modifiesRegister(BaseReg, TRI)) 2081 return false; 2082 2083 // Check if this load/store has a hint to avoid pair formation. 2084 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. 2085 if (isLdStPairSuppressed(MI)) 2086 return false; 2087 2088 // On some CPUs quad load/store pairs are slower than two single load/stores. 2089 if (Subtarget.isPaired128Slow()) { 2090 switch (MI.getOpcode()) { 2091 default: 2092 break; 2093 case AArch64::LDURQi: 2094 case AArch64::STURQi: 2095 case AArch64::LDRQui: 2096 case AArch64::STRQui: 2097 return false; 2098 } 2099 } 2100 2101 return true; 2102 } 2103 2104 bool AArch64InstrInfo::getMemOpBaseRegImmOfs( 2105 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, 2106 const TargetRegisterInfo *TRI) const { 2107 unsigned Width; 2108 return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI); 2109 } 2110 2111 bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( 2112 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width, 2113 const TargetRegisterInfo *TRI) const { 2114 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 2115 // Handle only loads/stores with base register followed by immediate offset. 2116 if (LdSt.getNumExplicitOperands() == 3) { 2117 // Non-paired instruction (e.g., ldr x1, [x0, #8]). 2118 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm()) 2119 return false; 2120 } else if (LdSt.getNumExplicitOperands() == 4) { 2121 // Paired instruction (e.g., ldp x1, x2, [x0, #8]). 2122 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() || 2123 !LdSt.getOperand(3).isImm()) 2124 return false; 2125 } else 2126 return false; 2127 2128 // Get the scaling factor for the instruction and set the width for the 2129 // instruction. 2130 unsigned Scale = 0; 2131 int64_t Dummy1, Dummy2; 2132 2133 // If this returns false, then it's an instruction we don't want to handle. 2134 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2)) 2135 return false; 2136 2137 // Compute the offset. Offset is calculated as the immediate operand 2138 // multiplied by the scaling factor. Unscaled instructions have scaling factor 2139 // set to 1. 2140 if (LdSt.getNumExplicitOperands() == 3) { 2141 BaseReg = LdSt.getOperand(1).getReg(); 2142 Offset = LdSt.getOperand(2).getImm() * Scale; 2143 } else { 2144 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); 2145 BaseReg = LdSt.getOperand(2).getReg(); 2146 Offset = LdSt.getOperand(3).getImm() * Scale; 2147 } 2148 return true; 2149 } 2150 2151 MachineOperand & 2152 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const { 2153 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 2154 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1); 2155 assert(OfsOp.isImm() && "Offset operand wasn't immediate."); 2156 return OfsOp; 2157 } 2158 2159 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, 2160 unsigned &Width, int64_t &MinOffset, 2161 int64_t &MaxOffset) const { 2162 switch (Opcode) { 2163 // Not a memory operation or something we want to handle. 2164 default: 2165 Scale = Width = 0; 2166 MinOffset = MaxOffset = 0; 2167 return false; 2168 case AArch64::STRWpost: 2169 case AArch64::LDRWpost: 2170 Width = 32; 2171 Scale = 4; 2172 MinOffset = -256; 2173 MaxOffset = 255; 2174 break; 2175 case AArch64::LDURQi: 2176 case AArch64::STURQi: 2177 Width = 16; 2178 Scale = 1; 2179 MinOffset = -256; 2180 MaxOffset = 255; 2181 break; 2182 case AArch64::LDURXi: 2183 case AArch64::LDURDi: 2184 case AArch64::STURXi: 2185 case AArch64::STURDi: 2186 Width = 8; 2187 Scale = 1; 2188 MinOffset = -256; 2189 MaxOffset = 255; 2190 break; 2191 case AArch64::LDURWi: 2192 case AArch64::LDURSi: 2193 case AArch64::LDURSWi: 2194 case AArch64::STURWi: 2195 case AArch64::STURSi: 2196 Width = 4; 2197 Scale = 1; 2198 MinOffset = -256; 2199 MaxOffset = 255; 2200 break; 2201 case AArch64::LDURHi: 2202 case AArch64::LDURHHi: 2203 case AArch64::LDURSHXi: 2204 case AArch64::LDURSHWi: 2205 case AArch64::STURHi: 2206 case AArch64::STURHHi: 2207 Width = 2; 2208 Scale = 1; 2209 MinOffset = -256; 2210 MaxOffset = 255; 2211 break; 2212 case AArch64::LDURBi: 2213 case AArch64::LDURBBi: 2214 case AArch64::LDURSBXi: 2215 case AArch64::LDURSBWi: 2216 case AArch64::STURBi: 2217 case AArch64::STURBBi: 2218 Width = 1; 2219 Scale = 1; 2220 MinOffset = -256; 2221 MaxOffset = 255; 2222 break; 2223 case AArch64::LDPQi: 2224 case AArch64::LDNPQi: 2225 case AArch64::STPQi: 2226 case AArch64::STNPQi: 2227 Scale = 16; 2228 Width = 32; 2229 MinOffset = -64; 2230 MaxOffset = 63; 2231 break; 2232 case AArch64::LDRQui: 2233 case AArch64::STRQui: 2234 Scale = Width = 16; 2235 MinOffset = 0; 2236 MaxOffset = 4095; 2237 break; 2238 case AArch64::LDPXi: 2239 case AArch64::LDPDi: 2240 case AArch64::LDNPXi: 2241 case AArch64::LDNPDi: 2242 case AArch64::STPXi: 2243 case AArch64::STPDi: 2244 case AArch64::STNPXi: 2245 case AArch64::STNPDi: 2246 Scale = 8; 2247 Width = 16; 2248 MinOffset = -64; 2249 MaxOffset = 63; 2250 break; 2251 case AArch64::LDRXui: 2252 case AArch64::LDRDui: 2253 case AArch64::STRXui: 2254 case AArch64::STRDui: 2255 Scale = Width = 8; 2256 MinOffset = 0; 2257 MaxOffset = 4095; 2258 break; 2259 case AArch64::LDPWi: 2260 case AArch64::LDPSi: 2261 case AArch64::LDNPWi: 2262 case AArch64::LDNPSi: 2263 case AArch64::STPWi: 2264 case AArch64::STPSi: 2265 case AArch64::STNPWi: 2266 case AArch64::STNPSi: 2267 Scale = 4; 2268 Width = 8; 2269 MinOffset = -64; 2270 MaxOffset = 63; 2271 break; 2272 case AArch64::LDRWui: 2273 case AArch64::LDRSui: 2274 case AArch64::LDRSWui: 2275 case AArch64::STRWui: 2276 case AArch64::STRSui: 2277 Scale = Width = 4; 2278 MinOffset = 0; 2279 MaxOffset = 4095; 2280 break; 2281 case AArch64::LDRHui: 2282 case AArch64::LDRHHui: 2283 case AArch64::STRHui: 2284 case AArch64::STRHHui: 2285 Scale = Width = 2; 2286 MinOffset = 0; 2287 MaxOffset = 4095; 2288 break; 2289 case AArch64::LDRBui: 2290 case AArch64::LDRBBui: 2291 case AArch64::STRBui: 2292 case AArch64::STRBBui: 2293 Scale = Width = 1; 2294 MinOffset = 0; 2295 MaxOffset = 4095; 2296 break; 2297 } 2298 2299 return true; 2300 } 2301 2302 // Scale the unscaled offsets. Returns false if the unscaled offset can't be 2303 // scaled. 2304 static bool scaleOffset(unsigned Opc, int64_t &Offset) { 2305 unsigned OffsetStride = 1; 2306 switch (Opc) { 2307 default: 2308 return false; 2309 case AArch64::LDURQi: 2310 case AArch64::STURQi: 2311 OffsetStride = 16; 2312 break; 2313 case AArch64::LDURXi: 2314 case AArch64::LDURDi: 2315 case AArch64::STURXi: 2316 case AArch64::STURDi: 2317 OffsetStride = 8; 2318 break; 2319 case AArch64::LDURWi: 2320 case AArch64::LDURSi: 2321 case AArch64::LDURSWi: 2322 case AArch64::STURWi: 2323 case AArch64::STURSi: 2324 OffsetStride = 4; 2325 break; 2326 } 2327 // If the byte-offset isn't a multiple of the stride, we can't scale this 2328 // offset. 2329 if (Offset % OffsetStride != 0) 2330 return false; 2331 2332 // Convert the byte-offset used by unscaled into an "element" offset used 2333 // by the scaled pair load/store instructions. 2334 Offset /= OffsetStride; 2335 return true; 2336 } 2337 2338 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { 2339 if (FirstOpc == SecondOpc) 2340 return true; 2341 // We can also pair sign-ext and zero-ext instructions. 2342 switch (FirstOpc) { 2343 default: 2344 return false; 2345 case AArch64::LDRWui: 2346 case AArch64::LDURWi: 2347 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi; 2348 case AArch64::LDRSWui: 2349 case AArch64::LDURSWi: 2350 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi; 2351 } 2352 // These instructions can't be paired based on their opcodes. 2353 return false; 2354 } 2355 2356 /// Detect opportunities for ldp/stp formation. 2357 /// 2358 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true. 2359 bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, 2360 unsigned BaseReg1, 2361 MachineInstr &SecondLdSt, 2362 unsigned BaseReg2, 2363 unsigned NumLoads) const { 2364 if (BaseReg1 != BaseReg2) 2365 return false; 2366 2367 // Only cluster up to a single pair. 2368 if (NumLoads > 1) 2369 return false; 2370 2371 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt)) 2372 return false; 2373 2374 // Can we pair these instructions based on their opcodes? 2375 unsigned FirstOpc = FirstLdSt.getOpcode(); 2376 unsigned SecondOpc = SecondLdSt.getOpcode(); 2377 if (!canPairLdStOpc(FirstOpc, SecondOpc)) 2378 return false; 2379 2380 // Can't merge volatiles or load/stores that have a hint to avoid pair 2381 // formation, for example. 2382 if (!isCandidateToMergeOrPair(FirstLdSt) || 2383 !isCandidateToMergeOrPair(SecondLdSt)) 2384 return false; 2385 2386 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate. 2387 int64_t Offset1 = FirstLdSt.getOperand(2).getImm(); 2388 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1)) 2389 return false; 2390 2391 int64_t Offset2 = SecondLdSt.getOperand(2).getImm(); 2392 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2)) 2393 return false; 2394 2395 // Pairwise instructions have a 7-bit signed offset field. 2396 if (Offset1 > 63 || Offset1 < -64) 2397 return false; 2398 2399 // The caller should already have ordered First/SecondLdSt by offset. 2400 assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); 2401 return Offset1 + 1 == Offset2; 2402 } 2403 2404 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, 2405 unsigned Reg, unsigned SubIdx, 2406 unsigned State, 2407 const TargetRegisterInfo *TRI) { 2408 if (!SubIdx) 2409 return MIB.addReg(Reg, State); 2410 2411 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 2412 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 2413 return MIB.addReg(Reg, State, SubIdx); 2414 } 2415 2416 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, 2417 unsigned NumRegs) { 2418 // We really want the positive remainder mod 32 here, that happens to be 2419 // easily obtainable with a mask. 2420 return ((DestReg - SrcReg) & 0x1f) < NumRegs; 2421 } 2422 2423 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB, 2424 MachineBasicBlock::iterator I, 2425 const DebugLoc &DL, unsigned DestReg, 2426 unsigned SrcReg, bool KillSrc, 2427 unsigned Opcode, 2428 ArrayRef<unsigned> Indices) const { 2429 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON"); 2430 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2431 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 2432 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 2433 unsigned NumRegs = Indices.size(); 2434 2435 int SubReg = 0, End = NumRegs, Incr = 1; 2436 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { 2437 SubReg = NumRegs - 1; 2438 End = -1; 2439 Incr = -1; 2440 } 2441 2442 for (; SubReg != End; SubReg += Incr) { 2443 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 2444 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 2445 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); 2446 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 2447 } 2448 } 2449 2450 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 2451 MachineBasicBlock::iterator I, 2452 const DebugLoc &DL, unsigned DestReg, 2453 unsigned SrcReg, bool KillSrc) const { 2454 if (AArch64::GPR32spRegClass.contains(DestReg) && 2455 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { 2456 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2457 2458 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { 2459 // If either operand is WSP, expand to ADD #0. 2460 if (Subtarget.hasZeroCycleRegMove()) { 2461 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. 2462 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 2463 &AArch64::GPR64spRegClass); 2464 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 2465 &AArch64::GPR64spRegClass); 2466 // This instruction is reading and writing X registers. This may upset 2467 // the register scavenger and machine verifier, so we need to indicate 2468 // that we are reading an undefined value from SrcRegX, but a proper 2469 // value from SrcReg. 2470 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) 2471 .addReg(SrcRegX, RegState::Undef) 2472 .addImm(0) 2473 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 2474 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 2475 } else { 2476 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) 2477 .addReg(SrcReg, getKillRegState(KillSrc)) 2478 .addImm(0) 2479 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2480 } 2481 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) { 2482 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg) 2483 .addImm(0) 2484 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2485 } else { 2486 if (Subtarget.hasZeroCycleRegMove()) { 2487 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. 2488 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 2489 &AArch64::GPR64spRegClass); 2490 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 2491 &AArch64::GPR64spRegClass); 2492 // This instruction is reading and writing X registers. This may upset 2493 // the register scavenger and machine verifier, so we need to indicate 2494 // that we are reading an undefined value from SrcRegX, but a proper 2495 // value from SrcReg. 2496 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) 2497 .addReg(AArch64::XZR) 2498 .addReg(SrcRegX, RegState::Undef) 2499 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 2500 } else { 2501 // Otherwise, expand to ORR WZR. 2502 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) 2503 .addReg(AArch64::WZR) 2504 .addReg(SrcReg, getKillRegState(KillSrc)); 2505 } 2506 } 2507 return; 2508 } 2509 2510 if (AArch64::GPR64spRegClass.contains(DestReg) && 2511 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { 2512 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { 2513 // If either operand is SP, expand to ADD #0. 2514 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) 2515 .addReg(SrcReg, getKillRegState(KillSrc)) 2516 .addImm(0) 2517 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2518 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) { 2519 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg) 2520 .addImm(0) 2521 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2522 } else { 2523 // Otherwise, expand to ORR XZR. 2524 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) 2525 .addReg(AArch64::XZR) 2526 .addReg(SrcReg, getKillRegState(KillSrc)); 2527 } 2528 return; 2529 } 2530 2531 // Copy a DDDD register quad by copying the individual sub-registers. 2532 if (AArch64::DDDDRegClass.contains(DestReg) && 2533 AArch64::DDDDRegClass.contains(SrcReg)) { 2534 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, 2535 AArch64::dsub2, AArch64::dsub3}; 2536 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2537 Indices); 2538 return; 2539 } 2540 2541 // Copy a DDD register triple by copying the individual sub-registers. 2542 if (AArch64::DDDRegClass.contains(DestReg) && 2543 AArch64::DDDRegClass.contains(SrcReg)) { 2544 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, 2545 AArch64::dsub2}; 2546 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2547 Indices); 2548 return; 2549 } 2550 2551 // Copy a DD register pair by copying the individual sub-registers. 2552 if (AArch64::DDRegClass.contains(DestReg) && 2553 AArch64::DDRegClass.contains(SrcReg)) { 2554 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1}; 2555 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2556 Indices); 2557 return; 2558 } 2559 2560 // Copy a QQQQ register quad by copying the individual sub-registers. 2561 if (AArch64::QQQQRegClass.contains(DestReg) && 2562 AArch64::QQQQRegClass.contains(SrcReg)) { 2563 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, 2564 AArch64::qsub2, AArch64::qsub3}; 2565 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2566 Indices); 2567 return; 2568 } 2569 2570 // Copy a QQQ register triple by copying the individual sub-registers. 2571 if (AArch64::QQQRegClass.contains(DestReg) && 2572 AArch64::QQQRegClass.contains(SrcReg)) { 2573 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, 2574 AArch64::qsub2}; 2575 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2576 Indices); 2577 return; 2578 } 2579 2580 // Copy a QQ register pair by copying the individual sub-registers. 2581 if (AArch64::QQRegClass.contains(DestReg) && 2582 AArch64::QQRegClass.contains(SrcReg)) { 2583 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1}; 2584 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2585 Indices); 2586 return; 2587 } 2588 2589 if (AArch64::FPR128RegClass.contains(DestReg) && 2590 AArch64::FPR128RegClass.contains(SrcReg)) { 2591 if (Subtarget.hasNEON()) { 2592 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2593 .addReg(SrcReg) 2594 .addReg(SrcReg, getKillRegState(KillSrc)); 2595 } else { 2596 BuildMI(MBB, I, DL, get(AArch64::STRQpre)) 2597 .addReg(AArch64::SP, RegState::Define) 2598 .addReg(SrcReg, getKillRegState(KillSrc)) 2599 .addReg(AArch64::SP) 2600 .addImm(-16); 2601 BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) 2602 .addReg(AArch64::SP, RegState::Define) 2603 .addReg(DestReg, RegState::Define) 2604 .addReg(AArch64::SP) 2605 .addImm(16); 2606 } 2607 return; 2608 } 2609 2610 if (AArch64::FPR64RegClass.contains(DestReg) && 2611 AArch64::FPR64RegClass.contains(SrcReg)) { 2612 if (Subtarget.hasNEON()) { 2613 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, 2614 &AArch64::FPR128RegClass); 2615 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, 2616 &AArch64::FPR128RegClass); 2617 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2618 .addReg(SrcReg) 2619 .addReg(SrcReg, getKillRegState(KillSrc)); 2620 } else { 2621 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) 2622 .addReg(SrcReg, getKillRegState(KillSrc)); 2623 } 2624 return; 2625 } 2626 2627 if (AArch64::FPR32RegClass.contains(DestReg) && 2628 AArch64::FPR32RegClass.contains(SrcReg)) { 2629 if (Subtarget.hasNEON()) { 2630 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, 2631 &AArch64::FPR128RegClass); 2632 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, 2633 &AArch64::FPR128RegClass); 2634 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2635 .addReg(SrcReg) 2636 .addReg(SrcReg, getKillRegState(KillSrc)); 2637 } else { 2638 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2639 .addReg(SrcReg, getKillRegState(KillSrc)); 2640 } 2641 return; 2642 } 2643 2644 if (AArch64::FPR16RegClass.contains(DestReg) && 2645 AArch64::FPR16RegClass.contains(SrcReg)) { 2646 if (Subtarget.hasNEON()) { 2647 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 2648 &AArch64::FPR128RegClass); 2649 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 2650 &AArch64::FPR128RegClass); 2651 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2652 .addReg(SrcReg) 2653 .addReg(SrcReg, getKillRegState(KillSrc)); 2654 } else { 2655 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 2656 &AArch64::FPR32RegClass); 2657 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 2658 &AArch64::FPR32RegClass); 2659 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2660 .addReg(SrcReg, getKillRegState(KillSrc)); 2661 } 2662 return; 2663 } 2664 2665 if (AArch64::FPR8RegClass.contains(DestReg) && 2666 AArch64::FPR8RegClass.contains(SrcReg)) { 2667 if (Subtarget.hasNEON()) { 2668 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 2669 &AArch64::FPR128RegClass); 2670 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 2671 &AArch64::FPR128RegClass); 2672 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2673 .addReg(SrcReg) 2674 .addReg(SrcReg, getKillRegState(KillSrc)); 2675 } else { 2676 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 2677 &AArch64::FPR32RegClass); 2678 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 2679 &AArch64::FPR32RegClass); 2680 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2681 .addReg(SrcReg, getKillRegState(KillSrc)); 2682 } 2683 return; 2684 } 2685 2686 // Copies between GPR64 and FPR64. 2687 if (AArch64::FPR64RegClass.contains(DestReg) && 2688 AArch64::GPR64RegClass.contains(SrcReg)) { 2689 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) 2690 .addReg(SrcReg, getKillRegState(KillSrc)); 2691 return; 2692 } 2693 if (AArch64::GPR64RegClass.contains(DestReg) && 2694 AArch64::FPR64RegClass.contains(SrcReg)) { 2695 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) 2696 .addReg(SrcReg, getKillRegState(KillSrc)); 2697 return; 2698 } 2699 // Copies between GPR32 and FPR32. 2700 if (AArch64::FPR32RegClass.contains(DestReg) && 2701 AArch64::GPR32RegClass.contains(SrcReg)) { 2702 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) 2703 .addReg(SrcReg, getKillRegState(KillSrc)); 2704 return; 2705 } 2706 if (AArch64::GPR32RegClass.contains(DestReg) && 2707 AArch64::FPR32RegClass.contains(SrcReg)) { 2708 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) 2709 .addReg(SrcReg, getKillRegState(KillSrc)); 2710 return; 2711 } 2712 2713 if (DestReg == AArch64::NZCV) { 2714 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); 2715 BuildMI(MBB, I, DL, get(AArch64::MSR)) 2716 .addImm(AArch64SysReg::NZCV) 2717 .addReg(SrcReg, getKillRegState(KillSrc)) 2718 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); 2719 return; 2720 } 2721 2722 if (SrcReg == AArch64::NZCV) { 2723 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); 2724 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg) 2725 .addImm(AArch64SysReg::NZCV) 2726 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); 2727 return; 2728 } 2729 2730 llvm_unreachable("unimplemented reg-to-reg copy"); 2731 } 2732 2733 void AArch64InstrInfo::storeRegToStackSlot( 2734 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, 2735 bool isKill, int FI, const TargetRegisterClass *RC, 2736 const TargetRegisterInfo *TRI) const { 2737 DebugLoc DL; 2738 if (MBBI != MBB.end()) 2739 DL = MBBI->getDebugLoc(); 2740 MachineFunction &MF = *MBB.getParent(); 2741 MachineFrameInfo &MFI = MF.getFrameInfo(); 2742 unsigned Align = MFI.getObjectAlignment(FI); 2743 2744 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 2745 MachineMemOperand *MMO = MF.getMachineMemOperand( 2746 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); 2747 unsigned Opc = 0; 2748 bool Offset = true; 2749 switch (TRI->getSpillSize(*RC)) { 2750 case 1: 2751 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 2752 Opc = AArch64::STRBui; 2753 break; 2754 case 2: 2755 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 2756 Opc = AArch64::STRHui; 2757 break; 2758 case 4: 2759 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 2760 Opc = AArch64::STRWui; 2761 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 2762 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 2763 else 2764 assert(SrcReg != AArch64::WSP); 2765 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 2766 Opc = AArch64::STRSui; 2767 break; 2768 case 8: 2769 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 2770 Opc = AArch64::STRXui; 2771 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 2772 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 2773 else 2774 assert(SrcReg != AArch64::SP); 2775 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 2776 Opc = AArch64::STRDui; 2777 break; 2778 case 16: 2779 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 2780 Opc = AArch64::STRQui; 2781 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 2782 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2783 Opc = AArch64::ST1Twov1d; 2784 Offset = false; 2785 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) { 2786 BuildMI(MBB, MBBI, DL, get(AArch64::STPXi)) 2787 .addReg(TRI->getSubReg(SrcReg, AArch64::sube64), 2788 getKillRegState(isKill)) 2789 .addReg(TRI->getSubReg(SrcReg, AArch64::subo64), 2790 getKillRegState(isKill)) 2791 .addFrameIndex(FI) 2792 .addImm(0) 2793 .addMemOperand(MMO); 2794 return; 2795 } 2796 break; 2797 case 24: 2798 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 2799 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2800 Opc = AArch64::ST1Threev1d; 2801 Offset = false; 2802 } 2803 break; 2804 case 32: 2805 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 2806 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2807 Opc = AArch64::ST1Fourv1d; 2808 Offset = false; 2809 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 2810 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2811 Opc = AArch64::ST1Twov2d; 2812 Offset = false; 2813 } 2814 break; 2815 case 48: 2816 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 2817 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2818 Opc = AArch64::ST1Threev2d; 2819 Offset = false; 2820 } 2821 break; 2822 case 64: 2823 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 2824 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2825 Opc = AArch64::ST1Fourv2d; 2826 Offset = false; 2827 } 2828 break; 2829 } 2830 assert(Opc && "Unknown register class"); 2831 2832 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) 2833 .addReg(SrcReg, getKillRegState(isKill)) 2834 .addFrameIndex(FI); 2835 2836 if (Offset) 2837 MI.addImm(0); 2838 MI.addMemOperand(MMO); 2839 } 2840 2841 void AArch64InstrInfo::loadRegFromStackSlot( 2842 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, 2843 int FI, const TargetRegisterClass *RC, 2844 const TargetRegisterInfo *TRI) const { 2845 DebugLoc DL; 2846 if (MBBI != MBB.end()) 2847 DL = MBBI->getDebugLoc(); 2848 MachineFunction &MF = *MBB.getParent(); 2849 MachineFrameInfo &MFI = MF.getFrameInfo(); 2850 unsigned Align = MFI.getObjectAlignment(FI); 2851 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 2852 MachineMemOperand *MMO = MF.getMachineMemOperand( 2853 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); 2854 2855 unsigned Opc = 0; 2856 bool Offset = true; 2857 switch (TRI->getSpillSize(*RC)) { 2858 case 1: 2859 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 2860 Opc = AArch64::LDRBui; 2861 break; 2862 case 2: 2863 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 2864 Opc = AArch64::LDRHui; 2865 break; 2866 case 4: 2867 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 2868 Opc = AArch64::LDRWui; 2869 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 2870 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); 2871 else 2872 assert(DestReg != AArch64::WSP); 2873 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 2874 Opc = AArch64::LDRSui; 2875 break; 2876 case 8: 2877 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 2878 Opc = AArch64::LDRXui; 2879 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 2880 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); 2881 else 2882 assert(DestReg != AArch64::SP); 2883 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 2884 Opc = AArch64::LDRDui; 2885 break; 2886 case 16: 2887 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 2888 Opc = AArch64::LDRQui; 2889 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 2890 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2891 Opc = AArch64::LD1Twov1d; 2892 Offset = false; 2893 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) { 2894 BuildMI(MBB, MBBI, DL, get(AArch64::LDPXi)) 2895 .addReg(TRI->getSubReg(DestReg, AArch64::sube64), 2896 getDefRegState(true)) 2897 .addReg(TRI->getSubReg(DestReg, AArch64::subo64), 2898 getDefRegState(true)) 2899 .addFrameIndex(FI) 2900 .addImm(0) 2901 .addMemOperand(MMO); 2902 return; 2903 } 2904 break; 2905 case 24: 2906 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 2907 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2908 Opc = AArch64::LD1Threev1d; 2909 Offset = false; 2910 } 2911 break; 2912 case 32: 2913 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 2914 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2915 Opc = AArch64::LD1Fourv1d; 2916 Offset = false; 2917 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 2918 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2919 Opc = AArch64::LD1Twov2d; 2920 Offset = false; 2921 } 2922 break; 2923 case 48: 2924 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 2925 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2926 Opc = AArch64::LD1Threev2d; 2927 Offset = false; 2928 } 2929 break; 2930 case 64: 2931 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 2932 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2933 Opc = AArch64::LD1Fourv2d; 2934 Offset = false; 2935 } 2936 break; 2937 } 2938 assert(Opc && "Unknown register class"); 2939 2940 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) 2941 .addReg(DestReg, getDefRegState(true)) 2942 .addFrameIndex(FI); 2943 if (Offset) 2944 MI.addImm(0); 2945 MI.addMemOperand(MMO); 2946 } 2947 2948 void llvm::emitFrameOffset(MachineBasicBlock &MBB, 2949 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 2950 unsigned DestReg, unsigned SrcReg, int Offset, 2951 const TargetInstrInfo *TII, 2952 MachineInstr::MIFlag Flag, bool SetNZCV) { 2953 if (DestReg == SrcReg && Offset == 0) 2954 return; 2955 2956 assert((DestReg != AArch64::SP || Offset % 16 == 0) && 2957 "SP increment/decrement not 16-byte aligned"); 2958 2959 bool isSub = Offset < 0; 2960 if (isSub) 2961 Offset = -Offset; 2962 2963 // FIXME: If the offset won't fit in 24-bits, compute the offset into a 2964 // scratch register. If DestReg is a virtual register, use it as the 2965 // scratch register; otherwise, create a new virtual register (to be 2966 // replaced by the scavenger at the end of PEI). That case can be optimized 2967 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch 2968 // register can be loaded with offset%8 and the add/sub can use an extending 2969 // instruction with LSL#3. 2970 // Currently the function handles any offsets but generates a poor sequence 2971 // of code. 2972 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); 2973 2974 unsigned Opc; 2975 if (SetNZCV) 2976 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri; 2977 else 2978 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri; 2979 const unsigned MaxEncoding = 0xfff; 2980 const unsigned ShiftSize = 12; 2981 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; 2982 while (((unsigned)Offset) >= (1 << ShiftSize)) { 2983 unsigned ThisVal; 2984 if (((unsigned)Offset) > MaxEncodableValue) { 2985 ThisVal = MaxEncodableValue; 2986 } else { 2987 ThisVal = Offset & MaxEncodableValue; 2988 } 2989 assert((ThisVal >> ShiftSize) <= MaxEncoding && 2990 "Encoding cannot handle value that big"); 2991 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 2992 .addReg(SrcReg) 2993 .addImm(ThisVal >> ShiftSize) 2994 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize)) 2995 .setMIFlag(Flag); 2996 2997 SrcReg = DestReg; 2998 Offset -= ThisVal; 2999 if (Offset == 0) 3000 return; 3001 } 3002 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 3003 .addReg(SrcReg) 3004 .addImm(Offset) 3005 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 3006 .setMIFlag(Flag); 3007 } 3008 3009 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( 3010 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 3011 MachineBasicBlock::iterator InsertPt, int FrameIndex, 3012 LiveIntervals *LIS) const { 3013 // This is a bit of a hack. Consider this instruction: 3014 // 3015 // %0 = COPY %sp; GPR64all:%0 3016 // 3017 // We explicitly chose GPR64all for the virtual register so such a copy might 3018 // be eliminated by RegisterCoalescer. However, that may not be possible, and 3019 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all 3020 // register class, TargetInstrInfo::foldMemoryOperand() is going to try. 3021 // 3022 // To prevent that, we are going to constrain the %0 register class here. 3023 // 3024 // <rdar://problem/11522048> 3025 // 3026 if (MI.isFullCopy()) { 3027 unsigned DstReg = MI.getOperand(0).getReg(); 3028 unsigned SrcReg = MI.getOperand(1).getReg(); 3029 if (SrcReg == AArch64::SP && 3030 TargetRegisterInfo::isVirtualRegister(DstReg)) { 3031 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); 3032 return nullptr; 3033 } 3034 if (DstReg == AArch64::SP && 3035 TargetRegisterInfo::isVirtualRegister(SrcReg)) { 3036 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 3037 return nullptr; 3038 } 3039 } 3040 3041 // Handle the case where a copy is being spilled or filled but the source 3042 // and destination register class don't match. For example: 3043 // 3044 // %0 = COPY %xzr; GPR64common:%0 3045 // 3046 // In this case we can still safely fold away the COPY and generate the 3047 // following spill code: 3048 // 3049 // STRXui %xzr, %stack.0 3050 // 3051 // This also eliminates spilled cross register class COPYs (e.g. between x and 3052 // d regs) of the same size. For example: 3053 // 3054 // %0 = COPY %1; GPR64:%0, FPR64:%1 3055 // 3056 // will be filled as 3057 // 3058 // LDRDui %0, fi<#0> 3059 // 3060 // instead of 3061 // 3062 // LDRXui %Temp, fi<#0> 3063 // %0 = FMOV %Temp 3064 // 3065 if (MI.isCopy() && Ops.size() == 1 && 3066 // Make sure we're only folding the explicit COPY defs/uses. 3067 (Ops[0] == 0 || Ops[0] == 1)) { 3068 bool IsSpill = Ops[0] == 0; 3069 bool IsFill = !IsSpill; 3070 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 3071 const MachineRegisterInfo &MRI = MF.getRegInfo(); 3072 MachineBasicBlock &MBB = *MI.getParent(); 3073 const MachineOperand &DstMO = MI.getOperand(0); 3074 const MachineOperand &SrcMO = MI.getOperand(1); 3075 unsigned DstReg = DstMO.getReg(); 3076 unsigned SrcReg = SrcMO.getReg(); 3077 // This is slightly expensive to compute for physical regs since 3078 // getMinimalPhysRegClass is slow. 3079 auto getRegClass = [&](unsigned Reg) { 3080 return TargetRegisterInfo::isVirtualRegister(Reg) 3081 ? MRI.getRegClass(Reg) 3082 : TRI.getMinimalPhysRegClass(Reg); 3083 }; 3084 3085 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { 3086 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) == 3087 TRI.getRegSizeInBits(*getRegClass(SrcReg)) && 3088 "Mismatched register size in non subreg COPY"); 3089 if (IsSpill) 3090 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, 3091 getRegClass(SrcReg), &TRI); 3092 else 3093 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, 3094 getRegClass(DstReg), &TRI); 3095 return &*--InsertPt; 3096 } 3097 3098 // Handle cases like spilling def of: 3099 // 3100 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0 3101 // 3102 // where the physical register source can be widened and stored to the full 3103 // virtual reg destination stack slot, in this case producing: 3104 // 3105 // STRXui %xzr, %stack.0 3106 // 3107 if (IsSpill && DstMO.isUndef() && 3108 TargetRegisterInfo::isPhysicalRegister(SrcReg)) { 3109 assert(SrcMO.getSubReg() == 0 && 3110 "Unexpected subreg on physical register"); 3111 const TargetRegisterClass *SpillRC; 3112 unsigned SpillSubreg; 3113 switch (DstMO.getSubReg()) { 3114 default: 3115 SpillRC = nullptr; 3116 break; 3117 case AArch64::sub_32: 3118 case AArch64::ssub: 3119 if (AArch64::GPR32RegClass.contains(SrcReg)) { 3120 SpillRC = &AArch64::GPR64RegClass; 3121 SpillSubreg = AArch64::sub_32; 3122 } else if (AArch64::FPR32RegClass.contains(SrcReg)) { 3123 SpillRC = &AArch64::FPR64RegClass; 3124 SpillSubreg = AArch64::ssub; 3125 } else 3126 SpillRC = nullptr; 3127 break; 3128 case AArch64::dsub: 3129 if (AArch64::FPR64RegClass.contains(SrcReg)) { 3130 SpillRC = &AArch64::FPR128RegClass; 3131 SpillSubreg = AArch64::dsub; 3132 } else 3133 SpillRC = nullptr; 3134 break; 3135 } 3136 3137 if (SpillRC) 3138 if (unsigned WidenedSrcReg = 3139 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) { 3140 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(), 3141 FrameIndex, SpillRC, &TRI); 3142 return &*--InsertPt; 3143 } 3144 } 3145 3146 // Handle cases like filling use of: 3147 // 3148 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1 3149 // 3150 // where we can load the full virtual reg source stack slot, into the subreg 3151 // destination, in this case producing: 3152 // 3153 // LDRWui %0:sub_32<def,read-undef>, %stack.0 3154 // 3155 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) { 3156 const TargetRegisterClass *FillRC; 3157 switch (DstMO.getSubReg()) { 3158 default: 3159 FillRC = nullptr; 3160 break; 3161 case AArch64::sub_32: 3162 FillRC = &AArch64::GPR32RegClass; 3163 break; 3164 case AArch64::ssub: 3165 FillRC = &AArch64::FPR32RegClass; 3166 break; 3167 case AArch64::dsub: 3168 FillRC = &AArch64::FPR64RegClass; 3169 break; 3170 } 3171 3172 if (FillRC) { 3173 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) == 3174 TRI.getRegSizeInBits(*FillRC) && 3175 "Mismatched regclass size on folded subreg COPY"); 3176 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI); 3177 MachineInstr &LoadMI = *--InsertPt; 3178 MachineOperand &LoadDst = LoadMI.getOperand(0); 3179 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load"); 3180 LoadDst.setSubReg(DstMO.getSubReg()); 3181 LoadDst.setIsUndef(); 3182 return &LoadMI; 3183 } 3184 } 3185 } 3186 3187 // Cannot fold. 3188 return nullptr; 3189 } 3190 3191 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, 3192 bool *OutUseUnscaledOp, 3193 unsigned *OutUnscaledOp, 3194 int *EmittableOffset) { 3195 int Scale = 1; 3196 bool IsSigned = false; 3197 // The ImmIdx should be changed case by case if it is not 2. 3198 unsigned ImmIdx = 2; 3199 unsigned UnscaledOp = 0; 3200 // Set output values in case of early exit. 3201 if (EmittableOffset) 3202 *EmittableOffset = 0; 3203 if (OutUseUnscaledOp) 3204 *OutUseUnscaledOp = false; 3205 if (OutUnscaledOp) 3206 *OutUnscaledOp = 0; 3207 switch (MI.getOpcode()) { 3208 default: 3209 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex"); 3210 // Vector spills/fills can't take an immediate offset. 3211 case AArch64::LD1Twov2d: 3212 case AArch64::LD1Threev2d: 3213 case AArch64::LD1Fourv2d: 3214 case AArch64::LD1Twov1d: 3215 case AArch64::LD1Threev1d: 3216 case AArch64::LD1Fourv1d: 3217 case AArch64::ST1Twov2d: 3218 case AArch64::ST1Threev2d: 3219 case AArch64::ST1Fourv2d: 3220 case AArch64::ST1Twov1d: 3221 case AArch64::ST1Threev1d: 3222 case AArch64::ST1Fourv1d: 3223 return AArch64FrameOffsetCannotUpdate; 3224 case AArch64::PRFMui: 3225 Scale = 8; 3226 UnscaledOp = AArch64::PRFUMi; 3227 break; 3228 case AArch64::LDRXui: 3229 Scale = 8; 3230 UnscaledOp = AArch64::LDURXi; 3231 break; 3232 case AArch64::LDRWui: 3233 Scale = 4; 3234 UnscaledOp = AArch64::LDURWi; 3235 break; 3236 case AArch64::LDRBui: 3237 Scale = 1; 3238 UnscaledOp = AArch64::LDURBi; 3239 break; 3240 case AArch64::LDRHui: 3241 Scale = 2; 3242 UnscaledOp = AArch64::LDURHi; 3243 break; 3244 case AArch64::LDRSui: 3245 Scale = 4; 3246 UnscaledOp = AArch64::LDURSi; 3247 break; 3248 case AArch64::LDRDui: 3249 Scale = 8; 3250 UnscaledOp = AArch64::LDURDi; 3251 break; 3252 case AArch64::LDRQui: 3253 Scale = 16; 3254 UnscaledOp = AArch64::LDURQi; 3255 break; 3256 case AArch64::LDRBBui: 3257 Scale = 1; 3258 UnscaledOp = AArch64::LDURBBi; 3259 break; 3260 case AArch64::LDRHHui: 3261 Scale = 2; 3262 UnscaledOp = AArch64::LDURHHi; 3263 break; 3264 case AArch64::LDRSBXui: 3265 Scale = 1; 3266 UnscaledOp = AArch64::LDURSBXi; 3267 break; 3268 case AArch64::LDRSBWui: 3269 Scale = 1; 3270 UnscaledOp = AArch64::LDURSBWi; 3271 break; 3272 case AArch64::LDRSHXui: 3273 Scale = 2; 3274 UnscaledOp = AArch64::LDURSHXi; 3275 break; 3276 case AArch64::LDRSHWui: 3277 Scale = 2; 3278 UnscaledOp = AArch64::LDURSHWi; 3279 break; 3280 case AArch64::LDRSWui: 3281 Scale = 4; 3282 UnscaledOp = AArch64::LDURSWi; 3283 break; 3284 3285 case AArch64::STRXui: 3286 Scale = 8; 3287 UnscaledOp = AArch64::STURXi; 3288 break; 3289 case AArch64::STRWui: 3290 Scale = 4; 3291 UnscaledOp = AArch64::STURWi; 3292 break; 3293 case AArch64::STRBui: 3294 Scale = 1; 3295 UnscaledOp = AArch64::STURBi; 3296 break; 3297 case AArch64::STRHui: 3298 Scale = 2; 3299 UnscaledOp = AArch64::STURHi; 3300 break; 3301 case AArch64::STRSui: 3302 Scale = 4; 3303 UnscaledOp = AArch64::STURSi; 3304 break; 3305 case AArch64::STRDui: 3306 Scale = 8; 3307 UnscaledOp = AArch64::STURDi; 3308 break; 3309 case AArch64::STRQui: 3310 Scale = 16; 3311 UnscaledOp = AArch64::STURQi; 3312 break; 3313 case AArch64::STRBBui: 3314 Scale = 1; 3315 UnscaledOp = AArch64::STURBBi; 3316 break; 3317 case AArch64::STRHHui: 3318 Scale = 2; 3319 UnscaledOp = AArch64::STURHHi; 3320 break; 3321 3322 case AArch64::LDPXi: 3323 case AArch64::LDPDi: 3324 case AArch64::STPXi: 3325 case AArch64::STPDi: 3326 case AArch64::LDNPXi: 3327 case AArch64::LDNPDi: 3328 case AArch64::STNPXi: 3329 case AArch64::STNPDi: 3330 ImmIdx = 3; 3331 IsSigned = true; 3332 Scale = 8; 3333 break; 3334 case AArch64::LDPQi: 3335 case AArch64::STPQi: 3336 case AArch64::LDNPQi: 3337 case AArch64::STNPQi: 3338 ImmIdx = 3; 3339 IsSigned = true; 3340 Scale = 16; 3341 break; 3342 case AArch64::LDPWi: 3343 case AArch64::LDPSi: 3344 case AArch64::STPWi: 3345 case AArch64::STPSi: 3346 case AArch64::LDNPWi: 3347 case AArch64::LDNPSi: 3348 case AArch64::STNPWi: 3349 case AArch64::STNPSi: 3350 ImmIdx = 3; 3351 IsSigned = true; 3352 Scale = 4; 3353 break; 3354 3355 case AArch64::LDURXi: 3356 case AArch64::LDURWi: 3357 case AArch64::LDURBi: 3358 case AArch64::LDURHi: 3359 case AArch64::LDURSi: 3360 case AArch64::LDURDi: 3361 case AArch64::LDURQi: 3362 case AArch64::LDURHHi: 3363 case AArch64::LDURBBi: 3364 case AArch64::LDURSBXi: 3365 case AArch64::LDURSBWi: 3366 case AArch64::LDURSHXi: 3367 case AArch64::LDURSHWi: 3368 case AArch64::LDURSWi: 3369 case AArch64::STURXi: 3370 case AArch64::STURWi: 3371 case AArch64::STURBi: 3372 case AArch64::STURHi: 3373 case AArch64::STURSi: 3374 case AArch64::STURDi: 3375 case AArch64::STURQi: 3376 case AArch64::STURBBi: 3377 case AArch64::STURHHi: 3378 Scale = 1; 3379 break; 3380 } 3381 3382 Offset += MI.getOperand(ImmIdx).getImm() * Scale; 3383 3384 bool useUnscaledOp = false; 3385 // If the offset doesn't match the scale, we rewrite the instruction to 3386 // use the unscaled instruction instead. Likewise, if we have a negative 3387 // offset (and have an unscaled op to use). 3388 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0)) 3389 useUnscaledOp = true; 3390 3391 // Use an unscaled addressing mode if the instruction has a negative offset 3392 // (or if the instruction is already using an unscaled addressing mode). 3393 unsigned MaskBits; 3394 if (IsSigned) { 3395 // ldp/stp instructions. 3396 MaskBits = 7; 3397 Offset /= Scale; 3398 } else if (UnscaledOp == 0 || useUnscaledOp) { 3399 MaskBits = 9; 3400 IsSigned = true; 3401 Scale = 1; 3402 } else { 3403 MaskBits = 12; 3404 IsSigned = false; 3405 Offset /= Scale; 3406 } 3407 3408 // Attempt to fold address computation. 3409 int MaxOff = (1 << (MaskBits - IsSigned)) - 1; 3410 int MinOff = (IsSigned ? (-MaxOff - 1) : 0); 3411 if (Offset >= MinOff && Offset <= MaxOff) { 3412 if (EmittableOffset) 3413 *EmittableOffset = Offset; 3414 Offset = 0; 3415 } else { 3416 int NewOff = Offset < 0 ? MinOff : MaxOff; 3417 if (EmittableOffset) 3418 *EmittableOffset = NewOff; 3419 Offset = (Offset - NewOff) * Scale; 3420 } 3421 if (OutUseUnscaledOp) 3422 *OutUseUnscaledOp = useUnscaledOp; 3423 if (OutUnscaledOp) 3424 *OutUnscaledOp = UnscaledOp; 3425 return AArch64FrameOffsetCanUpdate | 3426 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0); 3427 } 3428 3429 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 3430 unsigned FrameReg, int &Offset, 3431 const AArch64InstrInfo *TII) { 3432 unsigned Opcode = MI.getOpcode(); 3433 unsigned ImmIdx = FrameRegIdx + 1; 3434 3435 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { 3436 Offset += MI.getOperand(ImmIdx).getImm(); 3437 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), 3438 MI.getOperand(0).getReg(), FrameReg, Offset, TII, 3439 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); 3440 MI.eraseFromParent(); 3441 Offset = 0; 3442 return true; 3443 } 3444 3445 int NewOffset; 3446 unsigned UnscaledOp; 3447 bool UseUnscaledOp; 3448 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, 3449 &UnscaledOp, &NewOffset); 3450 if (Status & AArch64FrameOffsetCanUpdate) { 3451 if (Status & AArch64FrameOffsetIsLegal) 3452 // Replace the FrameIndex with FrameReg. 3453 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 3454 if (UseUnscaledOp) 3455 MI.setDesc(TII->get(UnscaledOp)); 3456 3457 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); 3458 return Offset == 0; 3459 } 3460 3461 return false; 3462 } 3463 3464 void AArch64InstrInfo::getNoop(MCInst &NopInst) const { 3465 NopInst.setOpcode(AArch64::HINT); 3466 NopInst.addOperand(MCOperand::createImm(0)); 3467 } 3468 3469 // AArch64 supports MachineCombiner. 3470 bool AArch64InstrInfo::useMachineCombiner() const { return true; } 3471 3472 // True when Opc sets flag 3473 static bool isCombineInstrSettingFlag(unsigned Opc) { 3474 switch (Opc) { 3475 case AArch64::ADDSWrr: 3476 case AArch64::ADDSWri: 3477 case AArch64::ADDSXrr: 3478 case AArch64::ADDSXri: 3479 case AArch64::SUBSWrr: 3480 case AArch64::SUBSXrr: 3481 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3482 case AArch64::SUBSWri: 3483 case AArch64::SUBSXri: 3484 return true; 3485 default: 3486 break; 3487 } 3488 return false; 3489 } 3490 3491 // 32b Opcodes that can be combined with a MUL 3492 static bool isCombineInstrCandidate32(unsigned Opc) { 3493 switch (Opc) { 3494 case AArch64::ADDWrr: 3495 case AArch64::ADDWri: 3496 case AArch64::SUBWrr: 3497 case AArch64::ADDSWrr: 3498 case AArch64::ADDSWri: 3499 case AArch64::SUBSWrr: 3500 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3501 case AArch64::SUBWri: 3502 case AArch64::SUBSWri: 3503 return true; 3504 default: 3505 break; 3506 } 3507 return false; 3508 } 3509 3510 // 64b Opcodes that can be combined with a MUL 3511 static bool isCombineInstrCandidate64(unsigned Opc) { 3512 switch (Opc) { 3513 case AArch64::ADDXrr: 3514 case AArch64::ADDXri: 3515 case AArch64::SUBXrr: 3516 case AArch64::ADDSXrr: 3517 case AArch64::ADDSXri: 3518 case AArch64::SUBSXrr: 3519 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3520 case AArch64::SUBXri: 3521 case AArch64::SUBSXri: 3522 return true; 3523 default: 3524 break; 3525 } 3526 return false; 3527 } 3528 3529 // FP Opcodes that can be combined with a FMUL 3530 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { 3531 switch (Inst.getOpcode()) { 3532 default: 3533 break; 3534 case AArch64::FADDSrr: 3535 case AArch64::FADDDrr: 3536 case AArch64::FADDv2f32: 3537 case AArch64::FADDv2f64: 3538 case AArch64::FADDv4f32: 3539 case AArch64::FSUBSrr: 3540 case AArch64::FSUBDrr: 3541 case AArch64::FSUBv2f32: 3542 case AArch64::FSUBv2f64: 3543 case AArch64::FSUBv4f32: 3544 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; 3545 return (Options.UnsafeFPMath || 3546 Options.AllowFPOpFusion == FPOpFusion::Fast); 3547 } 3548 return false; 3549 } 3550 3551 // Opcodes that can be combined with a MUL 3552 static bool isCombineInstrCandidate(unsigned Opc) { 3553 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc)); 3554 } 3555 3556 // 3557 // Utility routine that checks if \param MO is defined by an 3558 // \param CombineOpc instruction in the basic block \param MBB 3559 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, 3560 unsigned CombineOpc, unsigned ZeroReg = 0, 3561 bool CheckZeroReg = false) { 3562 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 3563 MachineInstr *MI = nullptr; 3564 3565 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) 3566 MI = MRI.getUniqueVRegDef(MO.getReg()); 3567 // And it needs to be in the trace (otherwise, it won't have a depth). 3568 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc) 3569 return false; 3570 // Must only used by the user we combine with. 3571 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 3572 return false; 3573 3574 if (CheckZeroReg) { 3575 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && 3576 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && 3577 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs"); 3578 // The third input reg must be zero. 3579 if (MI->getOperand(3).getReg() != ZeroReg) 3580 return false; 3581 } 3582 3583 return true; 3584 } 3585 3586 // 3587 // Is \param MO defined by an integer multiply and can be combined? 3588 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, 3589 unsigned MulOpc, unsigned ZeroReg) { 3590 return canCombine(MBB, MO, MulOpc, ZeroReg, true); 3591 } 3592 3593 // 3594 // Is \param MO defined by a floating-point multiply and can be combined? 3595 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, 3596 unsigned MulOpc) { 3597 return canCombine(MBB, MO, MulOpc); 3598 } 3599 3600 // TODO: There are many more machine instruction opcodes to match: 3601 // 1. Other data types (integer, vectors) 3602 // 2. Other math / logic operations (xor, or) 3603 // 3. Other forms of the same operation (intrinsics and other variants) 3604 bool AArch64InstrInfo::isAssociativeAndCommutative( 3605 const MachineInstr &Inst) const { 3606 switch (Inst.getOpcode()) { 3607 case AArch64::FADDDrr: 3608 case AArch64::FADDSrr: 3609 case AArch64::FADDv2f32: 3610 case AArch64::FADDv2f64: 3611 case AArch64::FADDv4f32: 3612 case AArch64::FMULDrr: 3613 case AArch64::FMULSrr: 3614 case AArch64::FMULX32: 3615 case AArch64::FMULX64: 3616 case AArch64::FMULXv2f32: 3617 case AArch64::FMULXv2f64: 3618 case AArch64::FMULXv4f32: 3619 case AArch64::FMULv2f32: 3620 case AArch64::FMULv2f64: 3621 case AArch64::FMULv4f32: 3622 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; 3623 default: 3624 return false; 3625 } 3626 } 3627 3628 /// Find instructions that can be turned into madd. 3629 static bool getMaddPatterns(MachineInstr &Root, 3630 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 3631 unsigned Opc = Root.getOpcode(); 3632 MachineBasicBlock &MBB = *Root.getParent(); 3633 bool Found = false; 3634 3635 if (!isCombineInstrCandidate(Opc)) 3636 return false; 3637 if (isCombineInstrSettingFlag(Opc)) { 3638 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true); 3639 // When NZCV is live bail out. 3640 if (Cmp_NZCV == -1) 3641 return false; 3642 unsigned NewOpc = convertToNonFlagSettingOpc(Root); 3643 // When opcode can't change bail out. 3644 // CHECKME: do we miss any cases for opcode conversion? 3645 if (NewOpc == Opc) 3646 return false; 3647 Opc = NewOpc; 3648 } 3649 3650 switch (Opc) { 3651 default: 3652 break; 3653 case AArch64::ADDWrr: 3654 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 3655 "ADDWrr does not have register operands"); 3656 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3657 AArch64::WZR)) { 3658 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1); 3659 Found = true; 3660 } 3661 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, 3662 AArch64::WZR)) { 3663 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2); 3664 Found = true; 3665 } 3666 break; 3667 case AArch64::ADDXrr: 3668 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3669 AArch64::XZR)) { 3670 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1); 3671 Found = true; 3672 } 3673 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, 3674 AArch64::XZR)) { 3675 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2); 3676 Found = true; 3677 } 3678 break; 3679 case AArch64::SUBWrr: 3680 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3681 AArch64::WZR)) { 3682 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1); 3683 Found = true; 3684 } 3685 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, 3686 AArch64::WZR)) { 3687 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2); 3688 Found = true; 3689 } 3690 break; 3691 case AArch64::SUBXrr: 3692 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3693 AArch64::XZR)) { 3694 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1); 3695 Found = true; 3696 } 3697 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, 3698 AArch64::XZR)) { 3699 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2); 3700 Found = true; 3701 } 3702 break; 3703 case AArch64::ADDWri: 3704 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3705 AArch64::WZR)) { 3706 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1); 3707 Found = true; 3708 } 3709 break; 3710 case AArch64::ADDXri: 3711 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3712 AArch64::XZR)) { 3713 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1); 3714 Found = true; 3715 } 3716 break; 3717 case AArch64::SUBWri: 3718 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3719 AArch64::WZR)) { 3720 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1); 3721 Found = true; 3722 } 3723 break; 3724 case AArch64::SUBXri: 3725 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3726 AArch64::XZR)) { 3727 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1); 3728 Found = true; 3729 } 3730 break; 3731 } 3732 return Found; 3733 } 3734 /// Floating-Point Support 3735 3736 /// Find instructions that can be turned into madd. 3737 static bool getFMAPatterns(MachineInstr &Root, 3738 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 3739 3740 if (!isCombineInstrCandidateFP(Root)) 3741 return false; 3742 3743 MachineBasicBlock &MBB = *Root.getParent(); 3744 bool Found = false; 3745 3746 switch (Root.getOpcode()) { 3747 default: 3748 assert(false && "Unsupported FP instruction in combiner\n"); 3749 break; 3750 case AArch64::FADDSrr: 3751 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 3752 "FADDWrr does not have register operands"); 3753 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { 3754 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1); 3755 Found = true; 3756 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3757 AArch64::FMULv1i32_indexed)) { 3758 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1); 3759 Found = true; 3760 } 3761 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { 3762 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2); 3763 Found = true; 3764 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3765 AArch64::FMULv1i32_indexed)) { 3766 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2); 3767 Found = true; 3768 } 3769 break; 3770 case AArch64::FADDDrr: 3771 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { 3772 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1); 3773 Found = true; 3774 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3775 AArch64::FMULv1i64_indexed)) { 3776 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1); 3777 Found = true; 3778 } 3779 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { 3780 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2); 3781 Found = true; 3782 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3783 AArch64::FMULv1i64_indexed)) { 3784 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2); 3785 Found = true; 3786 } 3787 break; 3788 case AArch64::FADDv2f32: 3789 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3790 AArch64::FMULv2i32_indexed)) { 3791 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1); 3792 Found = true; 3793 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3794 AArch64::FMULv2f32)) { 3795 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1); 3796 Found = true; 3797 } 3798 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3799 AArch64::FMULv2i32_indexed)) { 3800 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2); 3801 Found = true; 3802 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3803 AArch64::FMULv2f32)) { 3804 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2); 3805 Found = true; 3806 } 3807 break; 3808 case AArch64::FADDv2f64: 3809 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3810 AArch64::FMULv2i64_indexed)) { 3811 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1); 3812 Found = true; 3813 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3814 AArch64::FMULv2f64)) { 3815 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1); 3816 Found = true; 3817 } 3818 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3819 AArch64::FMULv2i64_indexed)) { 3820 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2); 3821 Found = true; 3822 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3823 AArch64::FMULv2f64)) { 3824 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2); 3825 Found = true; 3826 } 3827 break; 3828 case AArch64::FADDv4f32: 3829 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3830 AArch64::FMULv4i32_indexed)) { 3831 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1); 3832 Found = true; 3833 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3834 AArch64::FMULv4f32)) { 3835 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1); 3836 Found = true; 3837 } 3838 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3839 AArch64::FMULv4i32_indexed)) { 3840 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2); 3841 Found = true; 3842 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3843 AArch64::FMULv4f32)) { 3844 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2); 3845 Found = true; 3846 } 3847 break; 3848 3849 case AArch64::FSUBSrr: 3850 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { 3851 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1); 3852 Found = true; 3853 } 3854 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { 3855 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2); 3856 Found = true; 3857 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3858 AArch64::FMULv1i32_indexed)) { 3859 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2); 3860 Found = true; 3861 } 3862 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) { 3863 Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1); 3864 Found = true; 3865 } 3866 break; 3867 case AArch64::FSUBDrr: 3868 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { 3869 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1); 3870 Found = true; 3871 } 3872 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { 3873 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2); 3874 Found = true; 3875 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3876 AArch64::FMULv1i64_indexed)) { 3877 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2); 3878 Found = true; 3879 } 3880 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) { 3881 Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1); 3882 Found = true; 3883 } 3884 break; 3885 case AArch64::FSUBv2f32: 3886 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3887 AArch64::FMULv2i32_indexed)) { 3888 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2); 3889 Found = true; 3890 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3891 AArch64::FMULv2f32)) { 3892 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2); 3893 Found = true; 3894 } 3895 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3896 AArch64::FMULv2i32_indexed)) { 3897 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1); 3898 Found = true; 3899 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3900 AArch64::FMULv2f32)) { 3901 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1); 3902 Found = true; 3903 } 3904 break; 3905 case AArch64::FSUBv2f64: 3906 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3907 AArch64::FMULv2i64_indexed)) { 3908 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2); 3909 Found = true; 3910 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3911 AArch64::FMULv2f64)) { 3912 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2); 3913 Found = true; 3914 } 3915 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3916 AArch64::FMULv2i64_indexed)) { 3917 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1); 3918 Found = true; 3919 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3920 AArch64::FMULv2f64)) { 3921 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1); 3922 Found = true; 3923 } 3924 break; 3925 case AArch64::FSUBv4f32: 3926 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3927 AArch64::FMULv4i32_indexed)) { 3928 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2); 3929 Found = true; 3930 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3931 AArch64::FMULv4f32)) { 3932 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2); 3933 Found = true; 3934 } 3935 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3936 AArch64::FMULv4i32_indexed)) { 3937 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1); 3938 Found = true; 3939 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3940 AArch64::FMULv4f32)) { 3941 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1); 3942 Found = true; 3943 } 3944 break; 3945 } 3946 return Found; 3947 } 3948 3949 /// Return true when a code sequence can improve throughput. It 3950 /// should be called only for instructions in loops. 3951 /// \param Pattern - combiner pattern 3952 bool AArch64InstrInfo::isThroughputPattern( 3953 MachineCombinerPattern Pattern) const { 3954 switch (Pattern) { 3955 default: 3956 break; 3957 case MachineCombinerPattern::FMULADDS_OP1: 3958 case MachineCombinerPattern::FMULADDS_OP2: 3959 case MachineCombinerPattern::FMULSUBS_OP1: 3960 case MachineCombinerPattern::FMULSUBS_OP2: 3961 case MachineCombinerPattern::FMULADDD_OP1: 3962 case MachineCombinerPattern::FMULADDD_OP2: 3963 case MachineCombinerPattern::FMULSUBD_OP1: 3964 case MachineCombinerPattern::FMULSUBD_OP2: 3965 case MachineCombinerPattern::FNMULSUBS_OP1: 3966 case MachineCombinerPattern::FNMULSUBD_OP1: 3967 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 3968 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 3969 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 3970 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 3971 case MachineCombinerPattern::FMLAv2f32_OP2: 3972 case MachineCombinerPattern::FMLAv2f32_OP1: 3973 case MachineCombinerPattern::FMLAv2f64_OP1: 3974 case MachineCombinerPattern::FMLAv2f64_OP2: 3975 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 3976 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 3977 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 3978 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 3979 case MachineCombinerPattern::FMLAv4f32_OP1: 3980 case MachineCombinerPattern::FMLAv4f32_OP2: 3981 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 3982 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 3983 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 3984 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 3985 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 3986 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 3987 case MachineCombinerPattern::FMLSv2f32_OP2: 3988 case MachineCombinerPattern::FMLSv2f64_OP2: 3989 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 3990 case MachineCombinerPattern::FMLSv4f32_OP2: 3991 return true; 3992 } // end switch (Pattern) 3993 return false; 3994 } 3995 /// Return true when there is potentially a faster code sequence for an 3996 /// instruction chain ending in \p Root. All potential patterns are listed in 3997 /// the \p Pattern vector. Pattern should be sorted in priority order since the 3998 /// pattern evaluator stops checking as soon as it finds a faster sequence. 3999 4000 bool AArch64InstrInfo::getMachineCombinerPatterns( 4001 MachineInstr &Root, 4002 SmallVectorImpl<MachineCombinerPattern> &Patterns) const { 4003 // Integer patterns 4004 if (getMaddPatterns(Root, Patterns)) 4005 return true; 4006 // Floating point patterns 4007 if (getFMAPatterns(Root, Patterns)) 4008 return true; 4009 4010 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); 4011 } 4012 4013 enum class FMAInstKind { Default, Indexed, Accumulator }; 4014 /// genFusedMultiply - Generate fused multiply instructions. 4015 /// This function supports both integer and floating point instructions. 4016 /// A typical example: 4017 /// F|MUL I=A,B,0 4018 /// F|ADD R,I,C 4019 /// ==> F|MADD R,A,B,C 4020 /// \param MF Containing MachineFunction 4021 /// \param MRI Register information 4022 /// \param TII Target information 4023 /// \param Root is the F|ADD instruction 4024 /// \param [out] InsInstrs is a vector of machine instructions and will 4025 /// contain the generated madd instruction 4026 /// \param IdxMulOpd is index of operand in Root that is the result of 4027 /// the F|MUL. In the example above IdxMulOpd is 1. 4028 /// \param MaddOpc the opcode fo the f|madd instruction 4029 /// \param RC Register class of operands 4030 /// \param kind of fma instruction (addressing mode) to be generated 4031 /// \param ReplacedAddend is the result register from the instruction 4032 /// replacing the non-combined operand, if any. 4033 static MachineInstr * 4034 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, 4035 const TargetInstrInfo *TII, MachineInstr &Root, 4036 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd, 4037 unsigned MaddOpc, const TargetRegisterClass *RC, 4038 FMAInstKind kind = FMAInstKind::Default, 4039 const unsigned *ReplacedAddend = nullptr) { 4040 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 4041 4042 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; 4043 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 4044 unsigned ResultReg = Root.getOperand(0).getReg(); 4045 unsigned SrcReg0 = MUL->getOperand(1).getReg(); 4046 bool Src0IsKill = MUL->getOperand(1).isKill(); 4047 unsigned SrcReg1 = MUL->getOperand(2).getReg(); 4048 bool Src1IsKill = MUL->getOperand(2).isKill(); 4049 4050 unsigned SrcReg2; 4051 bool Src2IsKill; 4052 if (ReplacedAddend) { 4053 // If we just generated a new addend, we must be it's only use. 4054 SrcReg2 = *ReplacedAddend; 4055 Src2IsKill = true; 4056 } else { 4057 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); 4058 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); 4059 } 4060 4061 if (TargetRegisterInfo::isVirtualRegister(ResultReg)) 4062 MRI.constrainRegClass(ResultReg, RC); 4063 if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) 4064 MRI.constrainRegClass(SrcReg0, RC); 4065 if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) 4066 MRI.constrainRegClass(SrcReg1, RC); 4067 if (TargetRegisterInfo::isVirtualRegister(SrcReg2)) 4068 MRI.constrainRegClass(SrcReg2, RC); 4069 4070 MachineInstrBuilder MIB; 4071 if (kind == FMAInstKind::Default) 4072 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 4073 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 4074 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 4075 .addReg(SrcReg2, getKillRegState(Src2IsKill)); 4076 else if (kind == FMAInstKind::Indexed) 4077 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 4078 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 4079 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 4080 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 4081 .addImm(MUL->getOperand(3).getImm()); 4082 else if (kind == FMAInstKind::Accumulator) 4083 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 4084 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 4085 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 4086 .addReg(SrcReg1, getKillRegState(Src1IsKill)); 4087 else 4088 assert(false && "Invalid FMA instruction kind \n"); 4089 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL) 4090 InsInstrs.push_back(MIB); 4091 return MUL; 4092 } 4093 4094 /// genMaddR - Generate madd instruction and combine mul and add using 4095 /// an extra virtual register 4096 /// Example - an ADD intermediate needs to be stored in a register: 4097 /// MUL I=A,B,0 4098 /// ADD R,I,Imm 4099 /// ==> ORR V, ZR, Imm 4100 /// ==> MADD R,A,B,V 4101 /// \param MF Containing MachineFunction 4102 /// \param MRI Register information 4103 /// \param TII Target information 4104 /// \param Root is the ADD instruction 4105 /// \param [out] InsInstrs is a vector of machine instructions and will 4106 /// contain the generated madd instruction 4107 /// \param IdxMulOpd is index of operand in Root that is the result of 4108 /// the MUL. In the example above IdxMulOpd is 1. 4109 /// \param MaddOpc the opcode fo the madd instruction 4110 /// \param VR is a virtual register that holds the value of an ADD operand 4111 /// (V in the example above). 4112 /// \param RC Register class of operands 4113 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, 4114 const TargetInstrInfo *TII, MachineInstr &Root, 4115 SmallVectorImpl<MachineInstr *> &InsInstrs, 4116 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, 4117 const TargetRegisterClass *RC) { 4118 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 4119 4120 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 4121 unsigned ResultReg = Root.getOperand(0).getReg(); 4122 unsigned SrcReg0 = MUL->getOperand(1).getReg(); 4123 bool Src0IsKill = MUL->getOperand(1).isKill(); 4124 unsigned SrcReg1 = MUL->getOperand(2).getReg(); 4125 bool Src1IsKill = MUL->getOperand(2).isKill(); 4126 4127 if (TargetRegisterInfo::isVirtualRegister(ResultReg)) 4128 MRI.constrainRegClass(ResultReg, RC); 4129 if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) 4130 MRI.constrainRegClass(SrcReg0, RC); 4131 if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) 4132 MRI.constrainRegClass(SrcReg1, RC); 4133 if (TargetRegisterInfo::isVirtualRegister(VR)) 4134 MRI.constrainRegClass(VR, RC); 4135 4136 MachineInstrBuilder MIB = 4137 BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 4138 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 4139 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 4140 .addReg(VR); 4141 // Insert the MADD 4142 InsInstrs.push_back(MIB); 4143 return MUL; 4144 } 4145 4146 /// When getMachineCombinerPatterns() finds potential patterns, 4147 /// this function generates the instructions that could replace the 4148 /// original code sequence 4149 void AArch64InstrInfo::genAlternativeCodeSequence( 4150 MachineInstr &Root, MachineCombinerPattern Pattern, 4151 SmallVectorImpl<MachineInstr *> &InsInstrs, 4152 SmallVectorImpl<MachineInstr *> &DelInstrs, 4153 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 4154 MachineBasicBlock &MBB = *Root.getParent(); 4155 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 4156 MachineFunction &MF = *MBB.getParent(); 4157 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 4158 4159 MachineInstr *MUL; 4160 const TargetRegisterClass *RC; 4161 unsigned Opc; 4162 switch (Pattern) { 4163 default: 4164 // Reassociate instructions. 4165 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, 4166 DelInstrs, InstrIdxForVirtReg); 4167 return; 4168 case MachineCombinerPattern::MULADDW_OP1: 4169 case MachineCombinerPattern::MULADDX_OP1: 4170 // MUL I=A,B,0 4171 // ADD R,I,C 4172 // ==> MADD R,A,B,C 4173 // --- Create(MADD); 4174 if (Pattern == MachineCombinerPattern::MULADDW_OP1) { 4175 Opc = AArch64::MADDWrrr; 4176 RC = &AArch64::GPR32RegClass; 4177 } else { 4178 Opc = AArch64::MADDXrrr; 4179 RC = &AArch64::GPR64RegClass; 4180 } 4181 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4182 break; 4183 case MachineCombinerPattern::MULADDW_OP2: 4184 case MachineCombinerPattern::MULADDX_OP2: 4185 // MUL I=A,B,0 4186 // ADD R,C,I 4187 // ==> MADD R,A,B,C 4188 // --- Create(MADD); 4189 if (Pattern == MachineCombinerPattern::MULADDW_OP2) { 4190 Opc = AArch64::MADDWrrr; 4191 RC = &AArch64::GPR32RegClass; 4192 } else { 4193 Opc = AArch64::MADDXrrr; 4194 RC = &AArch64::GPR64RegClass; 4195 } 4196 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4197 break; 4198 case MachineCombinerPattern::MULADDWI_OP1: 4199 case MachineCombinerPattern::MULADDXI_OP1: { 4200 // MUL I=A,B,0 4201 // ADD R,I,Imm 4202 // ==> ORR V, ZR, Imm 4203 // ==> MADD R,A,B,V 4204 // --- Create(MADD); 4205 const TargetRegisterClass *OrrRC; 4206 unsigned BitSize, OrrOpc, ZeroReg; 4207 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) { 4208 OrrOpc = AArch64::ORRWri; 4209 OrrRC = &AArch64::GPR32spRegClass; 4210 BitSize = 32; 4211 ZeroReg = AArch64::WZR; 4212 Opc = AArch64::MADDWrrr; 4213 RC = &AArch64::GPR32RegClass; 4214 } else { 4215 OrrOpc = AArch64::ORRXri; 4216 OrrRC = &AArch64::GPR64spRegClass; 4217 BitSize = 64; 4218 ZeroReg = AArch64::XZR; 4219 Opc = AArch64::MADDXrrr; 4220 RC = &AArch64::GPR64RegClass; 4221 } 4222 unsigned NewVR = MRI.createVirtualRegister(OrrRC); 4223 uint64_t Imm = Root.getOperand(2).getImm(); 4224 4225 if (Root.getOperand(3).isImm()) { 4226 unsigned Val = Root.getOperand(3).getImm(); 4227 Imm = Imm << Val; 4228 } 4229 uint64_t UImm = SignExtend64(Imm, BitSize); 4230 uint64_t Encoding; 4231 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 4232 MachineInstrBuilder MIB1 = 4233 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 4234 .addReg(ZeroReg) 4235 .addImm(Encoding); 4236 InsInstrs.push_back(MIB1); 4237 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4238 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 4239 } 4240 break; 4241 } 4242 case MachineCombinerPattern::MULSUBW_OP1: 4243 case MachineCombinerPattern::MULSUBX_OP1: { 4244 // MUL I=A,B,0 4245 // SUB R,I, C 4246 // ==> SUB V, 0, C 4247 // ==> MADD R,A,B,V // = -C + A*B 4248 // --- Create(MADD); 4249 const TargetRegisterClass *SubRC; 4250 unsigned SubOpc, ZeroReg; 4251 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) { 4252 SubOpc = AArch64::SUBWrr; 4253 SubRC = &AArch64::GPR32spRegClass; 4254 ZeroReg = AArch64::WZR; 4255 Opc = AArch64::MADDWrrr; 4256 RC = &AArch64::GPR32RegClass; 4257 } else { 4258 SubOpc = AArch64::SUBXrr; 4259 SubRC = &AArch64::GPR64spRegClass; 4260 ZeroReg = AArch64::XZR; 4261 Opc = AArch64::MADDXrrr; 4262 RC = &AArch64::GPR64RegClass; 4263 } 4264 unsigned NewVR = MRI.createVirtualRegister(SubRC); 4265 // SUB NewVR, 0, C 4266 MachineInstrBuilder MIB1 = 4267 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) 4268 .addReg(ZeroReg) 4269 .add(Root.getOperand(2)); 4270 InsInstrs.push_back(MIB1); 4271 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4272 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 4273 break; 4274 } 4275 case MachineCombinerPattern::MULSUBW_OP2: 4276 case MachineCombinerPattern::MULSUBX_OP2: 4277 // MUL I=A,B,0 4278 // SUB R,C,I 4279 // ==> MSUB R,A,B,C (computes C - A*B) 4280 // --- Create(MSUB); 4281 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) { 4282 Opc = AArch64::MSUBWrrr; 4283 RC = &AArch64::GPR32RegClass; 4284 } else { 4285 Opc = AArch64::MSUBXrrr; 4286 RC = &AArch64::GPR64RegClass; 4287 } 4288 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4289 break; 4290 case MachineCombinerPattern::MULSUBWI_OP1: 4291 case MachineCombinerPattern::MULSUBXI_OP1: { 4292 // MUL I=A,B,0 4293 // SUB R,I, Imm 4294 // ==> ORR V, ZR, -Imm 4295 // ==> MADD R,A,B,V // = -Imm + A*B 4296 // --- Create(MADD); 4297 const TargetRegisterClass *OrrRC; 4298 unsigned BitSize, OrrOpc, ZeroReg; 4299 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) { 4300 OrrOpc = AArch64::ORRWri; 4301 OrrRC = &AArch64::GPR32spRegClass; 4302 BitSize = 32; 4303 ZeroReg = AArch64::WZR; 4304 Opc = AArch64::MADDWrrr; 4305 RC = &AArch64::GPR32RegClass; 4306 } else { 4307 OrrOpc = AArch64::ORRXri; 4308 OrrRC = &AArch64::GPR64spRegClass; 4309 BitSize = 64; 4310 ZeroReg = AArch64::XZR; 4311 Opc = AArch64::MADDXrrr; 4312 RC = &AArch64::GPR64RegClass; 4313 } 4314 unsigned NewVR = MRI.createVirtualRegister(OrrRC); 4315 uint64_t Imm = Root.getOperand(2).getImm(); 4316 if (Root.getOperand(3).isImm()) { 4317 unsigned Val = Root.getOperand(3).getImm(); 4318 Imm = Imm << Val; 4319 } 4320 uint64_t UImm = SignExtend64(-Imm, BitSize); 4321 uint64_t Encoding; 4322 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 4323 MachineInstrBuilder MIB1 = 4324 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 4325 .addReg(ZeroReg) 4326 .addImm(Encoding); 4327 InsInstrs.push_back(MIB1); 4328 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4329 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 4330 } 4331 break; 4332 } 4333 // Floating Point Support 4334 case MachineCombinerPattern::FMULADDS_OP1: 4335 case MachineCombinerPattern::FMULADDD_OP1: 4336 // MUL I=A,B,0 4337 // ADD R,I,C 4338 // ==> MADD R,A,B,C 4339 // --- Create(MADD); 4340 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) { 4341 Opc = AArch64::FMADDSrrr; 4342 RC = &AArch64::FPR32RegClass; 4343 } else { 4344 Opc = AArch64::FMADDDrrr; 4345 RC = &AArch64::FPR64RegClass; 4346 } 4347 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4348 break; 4349 case MachineCombinerPattern::FMULADDS_OP2: 4350 case MachineCombinerPattern::FMULADDD_OP2: 4351 // FMUL I=A,B,0 4352 // FADD R,C,I 4353 // ==> FMADD R,A,B,C 4354 // --- Create(FMADD); 4355 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) { 4356 Opc = AArch64::FMADDSrrr; 4357 RC = &AArch64::FPR32RegClass; 4358 } else { 4359 Opc = AArch64::FMADDDrrr; 4360 RC = &AArch64::FPR64RegClass; 4361 } 4362 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4363 break; 4364 4365 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 4366 Opc = AArch64::FMLAv1i32_indexed; 4367 RC = &AArch64::FPR32RegClass; 4368 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4369 FMAInstKind::Indexed); 4370 break; 4371 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 4372 Opc = AArch64::FMLAv1i32_indexed; 4373 RC = &AArch64::FPR32RegClass; 4374 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4375 FMAInstKind::Indexed); 4376 break; 4377 4378 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 4379 Opc = AArch64::FMLAv1i64_indexed; 4380 RC = &AArch64::FPR64RegClass; 4381 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4382 FMAInstKind::Indexed); 4383 break; 4384 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 4385 Opc = AArch64::FMLAv1i64_indexed; 4386 RC = &AArch64::FPR64RegClass; 4387 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4388 FMAInstKind::Indexed); 4389 break; 4390 4391 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 4392 case MachineCombinerPattern::FMLAv2f32_OP1: 4393 RC = &AArch64::FPR64RegClass; 4394 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) { 4395 Opc = AArch64::FMLAv2i32_indexed; 4396 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4397 FMAInstKind::Indexed); 4398 } else { 4399 Opc = AArch64::FMLAv2f32; 4400 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4401 FMAInstKind::Accumulator); 4402 } 4403 break; 4404 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 4405 case MachineCombinerPattern::FMLAv2f32_OP2: 4406 RC = &AArch64::FPR64RegClass; 4407 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) { 4408 Opc = AArch64::FMLAv2i32_indexed; 4409 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4410 FMAInstKind::Indexed); 4411 } else { 4412 Opc = AArch64::FMLAv2f32; 4413 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4414 FMAInstKind::Accumulator); 4415 } 4416 break; 4417 4418 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 4419 case MachineCombinerPattern::FMLAv2f64_OP1: 4420 RC = &AArch64::FPR128RegClass; 4421 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) { 4422 Opc = AArch64::FMLAv2i64_indexed; 4423 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4424 FMAInstKind::Indexed); 4425 } else { 4426 Opc = AArch64::FMLAv2f64; 4427 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4428 FMAInstKind::Accumulator); 4429 } 4430 break; 4431 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 4432 case MachineCombinerPattern::FMLAv2f64_OP2: 4433 RC = &AArch64::FPR128RegClass; 4434 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) { 4435 Opc = AArch64::FMLAv2i64_indexed; 4436 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4437 FMAInstKind::Indexed); 4438 } else { 4439 Opc = AArch64::FMLAv2f64; 4440 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4441 FMAInstKind::Accumulator); 4442 } 4443 break; 4444 4445 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 4446 case MachineCombinerPattern::FMLAv4f32_OP1: 4447 RC = &AArch64::FPR128RegClass; 4448 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) { 4449 Opc = AArch64::FMLAv4i32_indexed; 4450 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4451 FMAInstKind::Indexed); 4452 } else { 4453 Opc = AArch64::FMLAv4f32; 4454 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4455 FMAInstKind::Accumulator); 4456 } 4457 break; 4458 4459 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 4460 case MachineCombinerPattern::FMLAv4f32_OP2: 4461 RC = &AArch64::FPR128RegClass; 4462 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) { 4463 Opc = AArch64::FMLAv4i32_indexed; 4464 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4465 FMAInstKind::Indexed); 4466 } else { 4467 Opc = AArch64::FMLAv4f32; 4468 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4469 FMAInstKind::Accumulator); 4470 } 4471 break; 4472 4473 case MachineCombinerPattern::FMULSUBS_OP1: 4474 case MachineCombinerPattern::FMULSUBD_OP1: { 4475 // FMUL I=A,B,0 4476 // FSUB R,I,C 4477 // ==> FNMSUB R,A,B,C // = -C + A*B 4478 // --- Create(FNMSUB); 4479 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) { 4480 Opc = AArch64::FNMSUBSrrr; 4481 RC = &AArch64::FPR32RegClass; 4482 } else { 4483 Opc = AArch64::FNMSUBDrrr; 4484 RC = &AArch64::FPR64RegClass; 4485 } 4486 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4487 break; 4488 } 4489 4490 case MachineCombinerPattern::FNMULSUBS_OP1: 4491 case MachineCombinerPattern::FNMULSUBD_OP1: { 4492 // FNMUL I=A,B,0 4493 // FSUB R,I,C 4494 // ==> FNMADD R,A,B,C // = -A*B - C 4495 // --- Create(FNMADD); 4496 if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) { 4497 Opc = AArch64::FNMADDSrrr; 4498 RC = &AArch64::FPR32RegClass; 4499 } else { 4500 Opc = AArch64::FNMADDDrrr; 4501 RC = &AArch64::FPR64RegClass; 4502 } 4503 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4504 break; 4505 } 4506 4507 case MachineCombinerPattern::FMULSUBS_OP2: 4508 case MachineCombinerPattern::FMULSUBD_OP2: { 4509 // FMUL I=A,B,0 4510 // FSUB R,C,I 4511 // ==> FMSUB R,A,B,C (computes C - A*B) 4512 // --- Create(FMSUB); 4513 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) { 4514 Opc = AArch64::FMSUBSrrr; 4515 RC = &AArch64::FPR32RegClass; 4516 } else { 4517 Opc = AArch64::FMSUBDrrr; 4518 RC = &AArch64::FPR64RegClass; 4519 } 4520 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4521 break; 4522 } 4523 4524 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 4525 Opc = AArch64::FMLSv1i32_indexed; 4526 RC = &AArch64::FPR32RegClass; 4527 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4528 FMAInstKind::Indexed); 4529 break; 4530 4531 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 4532 Opc = AArch64::FMLSv1i64_indexed; 4533 RC = &AArch64::FPR64RegClass; 4534 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4535 FMAInstKind::Indexed); 4536 break; 4537 4538 case MachineCombinerPattern::FMLSv2f32_OP2: 4539 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 4540 RC = &AArch64::FPR64RegClass; 4541 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) { 4542 Opc = AArch64::FMLSv2i32_indexed; 4543 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4544 FMAInstKind::Indexed); 4545 } else { 4546 Opc = AArch64::FMLSv2f32; 4547 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4548 FMAInstKind::Accumulator); 4549 } 4550 break; 4551 4552 case MachineCombinerPattern::FMLSv2f64_OP2: 4553 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 4554 RC = &AArch64::FPR128RegClass; 4555 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) { 4556 Opc = AArch64::FMLSv2i64_indexed; 4557 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4558 FMAInstKind::Indexed); 4559 } else { 4560 Opc = AArch64::FMLSv2f64; 4561 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4562 FMAInstKind::Accumulator); 4563 } 4564 break; 4565 4566 case MachineCombinerPattern::FMLSv4f32_OP2: 4567 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 4568 RC = &AArch64::FPR128RegClass; 4569 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) { 4570 Opc = AArch64::FMLSv4i32_indexed; 4571 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4572 FMAInstKind::Indexed); 4573 } else { 4574 Opc = AArch64::FMLSv4f32; 4575 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4576 FMAInstKind::Accumulator); 4577 } 4578 break; 4579 case MachineCombinerPattern::FMLSv2f32_OP1: 4580 case MachineCombinerPattern::FMLSv2i32_indexed_OP1: { 4581 RC = &AArch64::FPR64RegClass; 4582 unsigned NewVR = MRI.createVirtualRegister(RC); 4583 MachineInstrBuilder MIB1 = 4584 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR) 4585 .add(Root.getOperand(2)); 4586 InsInstrs.push_back(MIB1); 4587 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4588 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) { 4589 Opc = AArch64::FMLAv2i32_indexed; 4590 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4591 FMAInstKind::Indexed, &NewVR); 4592 } else { 4593 Opc = AArch64::FMLAv2f32; 4594 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4595 FMAInstKind::Accumulator, &NewVR); 4596 } 4597 break; 4598 } 4599 case MachineCombinerPattern::FMLSv4f32_OP1: 4600 case MachineCombinerPattern::FMLSv4i32_indexed_OP1: { 4601 RC = &AArch64::FPR128RegClass; 4602 unsigned NewVR = MRI.createVirtualRegister(RC); 4603 MachineInstrBuilder MIB1 = 4604 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR) 4605 .add(Root.getOperand(2)); 4606 InsInstrs.push_back(MIB1); 4607 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4608 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) { 4609 Opc = AArch64::FMLAv4i32_indexed; 4610 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4611 FMAInstKind::Indexed, &NewVR); 4612 } else { 4613 Opc = AArch64::FMLAv4f32; 4614 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4615 FMAInstKind::Accumulator, &NewVR); 4616 } 4617 break; 4618 } 4619 case MachineCombinerPattern::FMLSv2f64_OP1: 4620 case MachineCombinerPattern::FMLSv2i64_indexed_OP1: { 4621 RC = &AArch64::FPR128RegClass; 4622 unsigned NewVR = MRI.createVirtualRegister(RC); 4623 MachineInstrBuilder MIB1 = 4624 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR) 4625 .add(Root.getOperand(2)); 4626 InsInstrs.push_back(MIB1); 4627 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4628 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) { 4629 Opc = AArch64::FMLAv2i64_indexed; 4630 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4631 FMAInstKind::Indexed, &NewVR); 4632 } else { 4633 Opc = AArch64::FMLAv2f64; 4634 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4635 FMAInstKind::Accumulator, &NewVR); 4636 } 4637 break; 4638 } 4639 } // end switch (Pattern) 4640 // Record MUL and ADD/SUB for deletion 4641 DelInstrs.push_back(MUL); 4642 DelInstrs.push_back(&Root); 4643 } 4644 4645 /// Replace csincr-branch sequence by simple conditional branch 4646 /// 4647 /// Examples: 4648 /// 1. \code 4649 /// csinc w9, wzr, wzr, <condition code> 4650 /// tbnz w9, #0, 0x44 4651 /// \endcode 4652 /// to 4653 /// \code 4654 /// b.<inverted condition code> 4655 /// \endcode 4656 /// 4657 /// 2. \code 4658 /// csinc w9, wzr, wzr, <condition code> 4659 /// tbz w9, #0, 0x44 4660 /// \endcode 4661 /// to 4662 /// \code 4663 /// b.<condition code> 4664 /// \endcode 4665 /// 4666 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the 4667 /// compare's constant operand is power of 2. 4668 /// 4669 /// Examples: 4670 /// \code 4671 /// and w8, w8, #0x400 4672 /// cbnz w8, L1 4673 /// \endcode 4674 /// to 4675 /// \code 4676 /// tbnz w8, #10, L1 4677 /// \endcode 4678 /// 4679 /// \param MI Conditional Branch 4680 /// \return True when the simple conditional branch is generated 4681 /// 4682 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { 4683 bool IsNegativeBranch = false; 4684 bool IsTestAndBranch = false; 4685 unsigned TargetBBInMI = 0; 4686 switch (MI.getOpcode()) { 4687 default: 4688 llvm_unreachable("Unknown branch instruction?"); 4689 case AArch64::Bcc: 4690 return false; 4691 case AArch64::CBZW: 4692 case AArch64::CBZX: 4693 TargetBBInMI = 1; 4694 break; 4695 case AArch64::CBNZW: 4696 case AArch64::CBNZX: 4697 TargetBBInMI = 1; 4698 IsNegativeBranch = true; 4699 break; 4700 case AArch64::TBZW: 4701 case AArch64::TBZX: 4702 TargetBBInMI = 2; 4703 IsTestAndBranch = true; 4704 break; 4705 case AArch64::TBNZW: 4706 case AArch64::TBNZX: 4707 TargetBBInMI = 2; 4708 IsNegativeBranch = true; 4709 IsTestAndBranch = true; 4710 break; 4711 } 4712 // So we increment a zero register and test for bits other 4713 // than bit 0? Conservatively bail out in case the verifier 4714 // missed this case. 4715 if (IsTestAndBranch && MI.getOperand(1).getImm()) 4716 return false; 4717 4718 // Find Definition. 4719 assert(MI.getParent() && "Incomplete machine instruciton\n"); 4720 MachineBasicBlock *MBB = MI.getParent(); 4721 MachineFunction *MF = MBB->getParent(); 4722 MachineRegisterInfo *MRI = &MF->getRegInfo(); 4723 unsigned VReg = MI.getOperand(0).getReg(); 4724 if (!TargetRegisterInfo::isVirtualRegister(VReg)) 4725 return false; 4726 4727 MachineInstr *DefMI = MRI->getVRegDef(VReg); 4728 4729 // Look through COPY instructions to find definition. 4730 while (DefMI->isCopy()) { 4731 unsigned CopyVReg = DefMI->getOperand(1).getReg(); 4732 if (!MRI->hasOneNonDBGUse(CopyVReg)) 4733 return false; 4734 if (!MRI->hasOneDef(CopyVReg)) 4735 return false; 4736 DefMI = MRI->getVRegDef(CopyVReg); 4737 } 4738 4739 switch (DefMI->getOpcode()) { 4740 default: 4741 return false; 4742 // Fold AND into a TBZ/TBNZ if constant operand is power of 2. 4743 case AArch64::ANDWri: 4744 case AArch64::ANDXri: { 4745 if (IsTestAndBranch) 4746 return false; 4747 if (DefMI->getParent() != MBB) 4748 return false; 4749 if (!MRI->hasOneNonDBGUse(VReg)) 4750 return false; 4751 4752 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri); 4753 uint64_t Mask = AArch64_AM::decodeLogicalImmediate( 4754 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64); 4755 if (!isPowerOf2_64(Mask)) 4756 return false; 4757 4758 MachineOperand &MO = DefMI->getOperand(1); 4759 unsigned NewReg = MO.getReg(); 4760 if (!TargetRegisterInfo::isVirtualRegister(NewReg)) 4761 return false; 4762 4763 assert(!MRI->def_empty(NewReg) && "Register must be defined."); 4764 4765 MachineBasicBlock &RefToMBB = *MBB; 4766 MachineBasicBlock *TBB = MI.getOperand(1).getMBB(); 4767 DebugLoc DL = MI.getDebugLoc(); 4768 unsigned Imm = Log2_64(Mask); 4769 unsigned Opc = (Imm < 32) 4770 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW) 4771 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX); 4772 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc)) 4773 .addReg(NewReg) 4774 .addImm(Imm) 4775 .addMBB(TBB); 4776 // Register lives on to the CBZ now. 4777 MO.setIsKill(false); 4778 4779 // For immediate smaller than 32, we need to use the 32-bit 4780 // variant (W) in all cases. Indeed the 64-bit variant does not 4781 // allow to encode them. 4782 // Therefore, if the input register is 64-bit, we need to take the 4783 // 32-bit sub-part. 4784 if (!Is32Bit && Imm < 32) 4785 NewMI->getOperand(0).setSubReg(AArch64::sub_32); 4786 MI.eraseFromParent(); 4787 return true; 4788 } 4789 // Look for CSINC 4790 case AArch64::CSINCWr: 4791 case AArch64::CSINCXr: { 4792 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR && 4793 DefMI->getOperand(2).getReg() == AArch64::WZR) && 4794 !(DefMI->getOperand(1).getReg() == AArch64::XZR && 4795 DefMI->getOperand(2).getReg() == AArch64::XZR)) 4796 return false; 4797 4798 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) 4799 return false; 4800 4801 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); 4802 // Convert only when the condition code is not modified between 4803 // the CSINC and the branch. The CC may be used by other 4804 // instructions in between. 4805 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write)) 4806 return false; 4807 MachineBasicBlock &RefToMBB = *MBB; 4808 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB(); 4809 DebugLoc DL = MI.getDebugLoc(); 4810 if (IsNegativeBranch) 4811 CC = AArch64CC::getInvertedCondCode(CC); 4812 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB); 4813 MI.eraseFromParent(); 4814 return true; 4815 } 4816 } 4817 } 4818 4819 std::pair<unsigned, unsigned> 4820 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 4821 const unsigned Mask = AArch64II::MO_FRAGMENT; 4822 return std::make_pair(TF & Mask, TF & ~Mask); 4823 } 4824 4825 ArrayRef<std::pair<unsigned, const char *>> 4826 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 4827 using namespace AArch64II; 4828 4829 static const std::pair<unsigned, const char *> TargetFlags[] = { 4830 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"}, 4831 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"}, 4832 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"}, 4833 {MO_HI12, "aarch64-hi12"}}; 4834 return makeArrayRef(TargetFlags); 4835 } 4836 4837 ArrayRef<std::pair<unsigned, const char *>> 4838 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { 4839 using namespace AArch64II; 4840 4841 static const std::pair<unsigned, const char *> TargetFlags[] = { 4842 {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}}; 4843 return makeArrayRef(TargetFlags); 4844 } 4845 4846 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 4847 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { 4848 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = 4849 {{MOSuppressPair, "aarch64-suppress-pair"}, 4850 {MOStridedAccess, "aarch64-strided-access"}}; 4851 return makeArrayRef(TargetFlags); 4852 } 4853 4854 /// Constants defining how certain sequences should be outlined. 4855 /// This encompasses how an outlined function should be called, and what kind of 4856 /// frame should be emitted for that outlined function. 4857 /// 4858 /// \p MachineOutlinerDefault implies that the function should be called with 4859 /// a save and restore of LR to the stack. 4860 /// 4861 /// That is, 4862 /// 4863 /// I1 Save LR OUTLINED_FUNCTION: 4864 /// I2 --> BL OUTLINED_FUNCTION I1 4865 /// I3 Restore LR I2 4866 /// I3 4867 /// RET 4868 /// 4869 /// * Call construction overhead: 3 (save + BL + restore) 4870 /// * Frame construction overhead: 1 (ret) 4871 /// * Requires stack fixups? Yes 4872 /// 4873 /// \p MachineOutlinerTailCall implies that the function is being created from 4874 /// a sequence of instructions ending in a return. 4875 /// 4876 /// That is, 4877 /// 4878 /// I1 OUTLINED_FUNCTION: 4879 /// I2 --> B OUTLINED_FUNCTION I1 4880 /// RET I2 4881 /// RET 4882 /// 4883 /// * Call construction overhead: 1 (B) 4884 /// * Frame construction overhead: 0 (Return included in sequence) 4885 /// * Requires stack fixups? No 4886 /// 4887 /// \p MachineOutlinerNoLRSave implies that the function should be called using 4888 /// a BL instruction, but doesn't require LR to be saved and restored. This 4889 /// happens when LR is known to be dead. 4890 /// 4891 /// That is, 4892 /// 4893 /// I1 OUTLINED_FUNCTION: 4894 /// I2 --> BL OUTLINED_FUNCTION I1 4895 /// I3 I2 4896 /// I3 4897 /// RET 4898 /// 4899 /// * Call construction overhead: 1 (BL) 4900 /// * Frame construction overhead: 1 (RET) 4901 /// * Requires stack fixups? No 4902 /// 4903 /// \p MachineOutlinerThunk implies that the function is being created from 4904 /// a sequence of instructions ending in a call. The outlined function is 4905 /// called with a BL instruction, and the outlined function tail-calls the 4906 /// original call destination. 4907 /// 4908 /// That is, 4909 /// 4910 /// I1 OUTLINED_FUNCTION: 4911 /// I2 --> BL OUTLINED_FUNCTION I1 4912 /// BL f I2 4913 /// B f 4914 /// * Call construction overhead: 1 (BL) 4915 /// * Frame construction overhead: 0 4916 /// * Requires stack fixups? No 4917 /// 4918 /// \p MachineOutlinerRegSave implies that the function should be called with a 4919 /// save and restore of LR to an available register. This allows us to avoid 4920 /// stack fixups. Note that this outlining variant is compatible with the 4921 /// NoLRSave case. 4922 /// 4923 /// That is, 4924 /// 4925 /// I1 Save LR OUTLINED_FUNCTION: 4926 /// I2 --> BL OUTLINED_FUNCTION I1 4927 /// I3 Restore LR I2 4928 /// I3 4929 /// RET 4930 /// 4931 /// * Call construction overhead: 3 (save + BL + restore) 4932 /// * Frame construction overhead: 1 (ret) 4933 /// * Requires stack fixups? No 4934 enum MachineOutlinerClass { 4935 MachineOutlinerDefault, /// Emit a save, restore, call, and return. 4936 MachineOutlinerTailCall, /// Only emit a branch. 4937 MachineOutlinerNoLRSave, /// Emit a call and return. 4938 MachineOutlinerThunk, /// Emit a call and tail-call. 4939 MachineOutlinerRegSave /// Same as default, but save to a register. 4940 }; 4941 4942 enum MachineOutlinerMBBFlags { 4943 LRUnavailableSomewhere = 0x2, 4944 HasCalls = 0x4 4945 }; 4946 4947 unsigned 4948 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { 4949 MachineFunction *MF = C.getMF(); 4950 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>( 4951 MF->getSubtarget().getRegisterInfo()); 4952 4953 // Check if there is an available register across the sequence that we can 4954 // use. 4955 for (unsigned Reg : AArch64::GPR64RegClass) { 4956 if (!ARI->isReservedReg(*MF, Reg) && 4957 Reg != AArch64::LR && // LR is not reserved, but don't use it. 4958 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved. 4959 Reg != AArch64::X17 && // Ditto for X17. 4960 C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) 4961 return Reg; 4962 } 4963 4964 // No suitable register. Return 0. 4965 return 0u; 4966 } 4967 4968 outliner::OutlinedFunction 4969 AArch64InstrInfo::getOutliningCandidateInfo( 4970 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { 4971 unsigned SequenceSize = std::accumulate( 4972 RepeatedSequenceLocs[0].front(), 4973 std::next(RepeatedSequenceLocs[0].back()), 4974 0, [this](unsigned Sum, const MachineInstr &MI) { 4975 return Sum + getInstSizeInBytes(MI); 4976 }); 4977 4978 // Compute liveness information for each candidate. 4979 const TargetRegisterInfo &TRI = getRegisterInfo(); 4980 std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), 4981 [&TRI](outliner::Candidate &C) { C.initLRU(TRI); }); 4982 4983 // According to the AArch64 Procedure Call Standard, the following are 4984 // undefined on entry/exit from a function call: 4985 // 4986 // * Registers x16, x17, (and thus w16, w17) 4987 // * Condition codes (and thus the NZCV register) 4988 // 4989 // Because if this, we can't outline any sequence of instructions where 4990 // one 4991 // of these registers is live into/across it. Thus, we need to delete 4992 // those 4993 // candidates. 4994 auto CantGuaranteeValueAcrossCall = [](outliner::Candidate &C) { 4995 LiveRegUnits LRU = C.LRU; 4996 return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) || 4997 !LRU.available(AArch64::NZCV)); 4998 }; 4999 5000 // Erase every candidate that violates the restrictions above. (It could be 5001 // true that we have viable candidates, so it's not worth bailing out in 5002 // the case that, say, 1 out of 20 candidates violate the restructions.) 5003 RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), 5004 RepeatedSequenceLocs.end(), 5005 CantGuaranteeValueAcrossCall), 5006 RepeatedSequenceLocs.end()); 5007 5008 // If the sequence is empty, we're done. 5009 if (RepeatedSequenceLocs.empty()) 5010 return outliner::OutlinedFunction(); 5011 5012 // At this point, we have only "safe" candidates to outline. Figure out 5013 // frame + call instruction information. 5014 5015 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); 5016 5017 // Helper lambda which sets call information for every candidate. 5018 auto SetCandidateCallInfo = 5019 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { 5020 for (outliner::Candidate &C : RepeatedSequenceLocs) 5021 C.setCallInfo(CallID, NumBytesForCall); 5022 }; 5023 5024 unsigned FrameID = MachineOutlinerDefault; 5025 unsigned NumBytesToCreateFrame = 4; 5026 5027 // If the last instruction in any candidate is a terminator, then we should 5028 // tail call all of the candidates. 5029 if (RepeatedSequenceLocs[0].back()->isTerminator()) { 5030 FrameID = MachineOutlinerTailCall; 5031 NumBytesToCreateFrame = 0; 5032 SetCandidateCallInfo(MachineOutlinerTailCall, 4); 5033 } 5034 5035 else if (LastInstrOpcode == AArch64::BL || LastInstrOpcode == AArch64::BLR) { 5036 // FIXME: Do we need to check if the code after this uses the value of LR? 5037 FrameID = MachineOutlinerThunk; 5038 NumBytesToCreateFrame = 0; 5039 SetCandidateCallInfo(MachineOutlinerThunk, 4); 5040 } 5041 5042 // Make sure that LR isn't live on entry to this candidate. The only 5043 // instructions that use LR that could possibly appear in a repeated sequence 5044 // are calls. Therefore, we only have to check and see if LR is dead on entry 5045 // to (or exit from) some candidate. 5046 else if (std::all_of(RepeatedSequenceLocs.begin(), 5047 RepeatedSequenceLocs.end(), 5048 [](outliner::Candidate &C) { 5049 return C.LRU.available(AArch64::LR); 5050 })) { 5051 FrameID = MachineOutlinerNoLRSave; 5052 NumBytesToCreateFrame = 4; 5053 SetCandidateCallInfo(MachineOutlinerNoLRSave, 4); 5054 } 5055 5056 // LR is live, so we need to save it. Decide whether it should be saved to 5057 // the stack, or if it can be saved to a register. 5058 else { 5059 if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), 5060 [this](outliner::Candidate &C) { 5061 return findRegisterToSaveLRTo(C); 5062 })) { 5063 // Every candidate has an available callee-saved register for the save. 5064 // We can save LR to a register. 5065 FrameID = MachineOutlinerRegSave; 5066 NumBytesToCreateFrame = 4; 5067 SetCandidateCallInfo(MachineOutlinerRegSave, 12); 5068 } 5069 5070 else { 5071 // At least one candidate does not have an available callee-saved 5072 // register. We must save LR to the stack. 5073 FrameID = MachineOutlinerDefault; 5074 NumBytesToCreateFrame = 4; 5075 SetCandidateCallInfo(MachineOutlinerDefault, 12); 5076 } 5077 } 5078 5079 // Check if the range contains a call. These require a save + restore of the 5080 // link register. 5081 if (std::any_of(RepeatedSequenceLocs[0].front(), 5082 RepeatedSequenceLocs[0].back(), 5083 [](const MachineInstr &MI) { return MI.isCall(); })) 5084 NumBytesToCreateFrame += 8; // Save + restore the link register. 5085 5086 // Handle the last instruction separately. If this is a tail call, then the 5087 // last instruction is a call. We don't want to save + restore in this case. 5088 // However, it could be possible that the last instruction is a call without 5089 // it being valid to tail call this sequence. We should consider this as well. 5090 else if (FrameID != MachineOutlinerThunk && 5091 FrameID != MachineOutlinerTailCall && 5092 RepeatedSequenceLocs[0].back()->isCall()) 5093 NumBytesToCreateFrame += 8; 5094 5095 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 5096 NumBytesToCreateFrame, FrameID); 5097 } 5098 5099 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( 5100 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { 5101 const Function &F = MF.getFunction(); 5102 5103 // Can F be deduplicated by the linker? If it can, don't outline from it. 5104 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) 5105 return false; 5106 5107 // Don't outline from functions with section markings; the program could 5108 // expect that all the code is in the named section. 5109 // FIXME: Allow outlining from multiple functions with the same section 5110 // marking. 5111 if (F.hasSection()) 5112 return false; 5113 5114 // Outlining from functions with redzones is unsafe since the outliner may 5115 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't 5116 // outline from it. 5117 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 5118 if (!AFI || AFI->hasRedZone().getValueOr(true)) 5119 return false; 5120 5121 // It's safe to outline from MF. 5122 return true; 5123 } 5124 5125 unsigned 5126 AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const { 5127 unsigned Flags = 0x0; 5128 // Check if there's a call inside this MachineBasicBlock. If there is, then 5129 // set a flag. 5130 if (std::any_of(MBB.begin(), MBB.end(), 5131 [](MachineInstr &MI) { return MI.isCall(); })) 5132 Flags |= MachineOutlinerMBBFlags::HasCalls; 5133 5134 // Check if LR is available through all of the MBB. If it's not, then set 5135 // a flag. 5136 assert(MBB.getParent()->getRegInfo().tracksLiveness() && 5137 "Suitable Machine Function for outlining must track liveness"); 5138 LiveRegUnits LRU(getRegisterInfo()); 5139 LRU.addLiveOuts(MBB); 5140 5141 std::for_each(MBB.rbegin(), 5142 MBB.rend(), 5143 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); 5144 5145 if (!LRU.available(AArch64::LR)) 5146 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; 5147 5148 return Flags; 5149 } 5150 5151 outliner::InstrType 5152 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, 5153 unsigned Flags) const { 5154 MachineInstr &MI = *MIT; 5155 MachineBasicBlock *MBB = MI.getParent(); 5156 MachineFunction *MF = MBB->getParent(); 5157 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>(); 5158 5159 // Don't outline LOHs. 5160 if (FuncInfo->getLOHRelated().count(&MI)) 5161 return outliner::InstrType::Illegal; 5162 5163 // Don't allow debug values to impact outlining type. 5164 if (MI.isDebugInstr() || MI.isIndirectDebugValue()) 5165 return outliner::InstrType::Invisible; 5166 5167 // At this point, KILL instructions don't really tell us much so we can go 5168 // ahead and skip over them. 5169 if (MI.isKill()) 5170 return outliner::InstrType::Invisible; 5171 5172 // Is this a terminator for a basic block? 5173 if (MI.isTerminator()) { 5174 5175 // Is this the end of a function? 5176 if (MI.getParent()->succ_empty()) 5177 return outliner::InstrType::Legal; 5178 5179 // It's not, so don't outline it. 5180 return outliner::InstrType::Illegal; 5181 } 5182 5183 // Make sure none of the operands are un-outlinable. 5184 for (const MachineOperand &MOP : MI.operands()) { 5185 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || 5186 MOP.isTargetIndex()) 5187 return outliner::InstrType::Illegal; 5188 5189 // If it uses LR or W30 explicitly, then don't touch it. 5190 if (MOP.isReg() && !MOP.isImplicit() && 5191 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30)) 5192 return outliner::InstrType::Illegal; 5193 } 5194 5195 // Special cases for instructions that can always be outlined, but will fail 5196 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always 5197 // be outlined because they don't require a *specific* value to be in LR. 5198 if (MI.getOpcode() == AArch64::ADRP) 5199 return outliner::InstrType::Legal; 5200 5201 // If MI is a call we might be able to outline it. We don't want to outline 5202 // any calls that rely on the position of items on the stack. When we outline 5203 // something containing a call, we have to emit a save and restore of LR in 5204 // the outlined function. Currently, this always happens by saving LR to the 5205 // stack. Thus, if we outline, say, half the parameters for a function call 5206 // plus the call, then we'll break the callee's expectations for the layout 5207 // of the stack. 5208 // 5209 // FIXME: Allow calls to functions which construct a stack frame, as long 5210 // as they don't access arguments on the stack. 5211 // FIXME: Figure out some way to analyze functions defined in other modules. 5212 // We should be able to compute the memory usage based on the IR calling 5213 // convention, even if we can't see the definition. 5214 if (MI.isCall()) { 5215 // Get the function associated with the call. Look at each operand and find 5216 // the one that represents the callee and get its name. 5217 const Function *Callee = nullptr; 5218 for (const MachineOperand &MOP : MI.operands()) { 5219 if (MOP.isGlobal()) { 5220 Callee = dyn_cast<Function>(MOP.getGlobal()); 5221 break; 5222 } 5223 } 5224 5225 // Never outline calls to mcount. There isn't any rule that would require 5226 // this, but the Linux kernel's "ftrace" feature depends on it. 5227 if (Callee && Callee->getName() == "\01_mcount") 5228 return outliner::InstrType::Illegal; 5229 5230 // If we don't know anything about the callee, assume it depends on the 5231 // stack layout of the caller. In that case, it's only legal to outline 5232 // as a tail-call. Whitelist the call instructions we know about so we 5233 // don't get unexpected results with call pseudo-instructions. 5234 auto UnknownCallOutlineType = outliner::InstrType::Illegal; 5235 if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL) 5236 UnknownCallOutlineType = outliner::InstrType::LegalTerminator; 5237 5238 if (!Callee) 5239 return UnknownCallOutlineType; 5240 5241 // We have a function we have information about. Check it if it's something 5242 // can safely outline. 5243 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee); 5244 5245 // We don't know what's going on with the callee at all. Don't touch it. 5246 if (!CalleeMF) 5247 return UnknownCallOutlineType; 5248 5249 // Check if we know anything about the callee saves on the function. If we 5250 // don't, then don't touch it, since that implies that we haven't 5251 // computed anything about its stack frame yet. 5252 MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); 5253 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || 5254 MFI.getNumObjects() > 0) 5255 return UnknownCallOutlineType; 5256 5257 // At this point, we can say that CalleeMF ought to not pass anything on the 5258 // stack. Therefore, we can outline it. 5259 return outliner::InstrType::Legal; 5260 } 5261 5262 // Don't outline positions. 5263 if (MI.isPosition()) 5264 return outliner::InstrType::Illegal; 5265 5266 // Don't touch the link register or W30. 5267 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) || 5268 MI.modifiesRegister(AArch64::W30, &getRegisterInfo())) 5269 return outliner::InstrType::Illegal; 5270 5271 // Does this use the stack? 5272 if (MI.modifiesRegister(AArch64::SP, &RI) || 5273 MI.readsRegister(AArch64::SP, &RI)) { 5274 // True if there is no chance that any outlined candidate from this range 5275 // could require stack fixups. That is, both 5276 // * LR is available in the range (No save/restore around call) 5277 // * The range doesn't include calls (No save/restore in outlined frame) 5278 // are true. 5279 // FIXME: This is very restrictive; the flags check the whole block, 5280 // not just the bit we will try to outline. 5281 bool MightNeedStackFixUp = 5282 (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere | 5283 MachineOutlinerMBBFlags::HasCalls)); 5284 5285 // If this instruction is in a range where it *never* needs to be fixed 5286 // up, then we can *always* outline it. This is true even if it's not 5287 // possible to fix that instruction up. 5288 // 5289 // Why? Consider two equivalent instructions I1, I2 where both I1 and I2 5290 // use SP. Suppose that I1 sits within a range that definitely doesn't 5291 // need stack fixups, while I2 sits in a range that does. 5292 // 5293 // First, I1 can be outlined as long as we *never* fix up the stack in 5294 // any sequence containing it. I1 is already a safe instruction in the 5295 // original program, so as long as we don't modify it we're good to go. 5296 // So this leaves us with showing that outlining I2 won't break our 5297 // program. 5298 // 5299 // Suppose I1 and I2 belong to equivalent candidate sequences. When we 5300 // look at I2, we need to see if it can be fixed up. Suppose I2, (and 5301 // thus I1) cannot be fixed up. Then I2 will be assigned an unique 5302 // integer label; thus, I2 cannot belong to any candidate sequence (a 5303 // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up 5304 // as well, so we're good. Thus, I1 is always safe to outline. 5305 // 5306 // This gives us two things: first off, it buys us some more instructions 5307 // for our search space by deeming stack instructions illegal only when 5308 // they can't be fixed up AND we might have to fix them up. Second off, 5309 // This allows us to catch tricky instructions like, say, 5310 // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might 5311 // be paired with later SUBXris, which might *not* end up being outlined. 5312 // If we mess with the stack to save something, then an ADDXri messes with 5313 // it *after*, then we aren't going to restore the right something from 5314 // the stack if we don't outline the corresponding SUBXri first. ADDXris and 5315 // SUBXris are extremely common in prologue/epilogue code, so supporting 5316 // them in the outliner can be a pretty big win! 5317 if (!MightNeedStackFixUp) 5318 return outliner::InstrType::Legal; 5319 5320 // Any modification of SP will break our code to save/restore LR. 5321 // FIXME: We could handle some instructions which add a constant offset to 5322 // SP, with a bit more work. 5323 if (MI.modifiesRegister(AArch64::SP, &RI)) 5324 return outliner::InstrType::Illegal; 5325 5326 // At this point, we have a stack instruction that we might need to fix 5327 // up. We'll handle it if it's a load or store. 5328 if (MI.mayLoadOrStore()) { 5329 unsigned Base; // Filled with the base regiser of MI. 5330 int64_t Offset; // Filled with the offset of MI. 5331 unsigned DummyWidth; 5332 5333 // Does it allow us to offset the base register and is the base SP? 5334 if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) || 5335 Base != AArch64::SP) 5336 return outliner::InstrType::Illegal; 5337 5338 // Find the minimum/maximum offset for this instruction and check if 5339 // fixing it up would be in range. 5340 int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction. 5341 unsigned Scale; // The scale to multiply the offsets by. 5342 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset); 5343 5344 // TODO: We should really test what happens if an instruction overflows. 5345 // This is tricky to test with IR tests, but when the outliner is moved 5346 // to a MIR test, it really ought to be checked. 5347 Offset += 16; // Update the offset to what it would be if we outlined. 5348 if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale) 5349 return outliner::InstrType::Illegal; 5350 5351 // It's in range, so we can outline it. 5352 return outliner::InstrType::Legal; 5353 } 5354 5355 // FIXME: Add handling for instructions like "add x0, sp, #8". 5356 5357 // We can't fix it up, so don't outline it. 5358 return outliner::InstrType::Illegal; 5359 } 5360 5361 return outliner::InstrType::Legal; 5362 } 5363 5364 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { 5365 for (MachineInstr &MI : MBB) { 5366 unsigned Base, Width; 5367 int64_t Offset; 5368 5369 // Is this a load or store with an immediate offset with SP as the base? 5370 if (!MI.mayLoadOrStore() || 5371 !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) || 5372 Base != AArch64::SP) 5373 continue; 5374 5375 // It is, so we have to fix it up. 5376 unsigned Scale; 5377 int64_t Dummy1, Dummy2; 5378 5379 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI); 5380 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!"); 5381 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2); 5382 assert(Scale != 0 && "Unexpected opcode!"); 5383 5384 // We've pushed the return address to the stack, so add 16 to the offset. 5385 // This is safe, since we already checked if it would overflow when we 5386 // checked if this instruction was legal to outline. 5387 int64_t NewImm = (Offset + 16) / Scale; 5388 StackOffsetOperand.setImm(NewImm); 5389 } 5390 } 5391 5392 void AArch64InstrInfo::buildOutlinedFrame( 5393 MachineBasicBlock &MBB, MachineFunction &MF, 5394 const outliner::OutlinedFunction &OF) const { 5395 // For thunk outlining, rewrite the last instruction from a call to a 5396 // tail-call. 5397 if (OF.FrameConstructionID == MachineOutlinerThunk) { 5398 MachineInstr *Call = &*--MBB.instr_end(); 5399 unsigned TailOpcode; 5400 if (Call->getOpcode() == AArch64::BL) { 5401 TailOpcode = AArch64::TCRETURNdi; 5402 } else { 5403 assert(Call->getOpcode() == AArch64::BLR); 5404 TailOpcode = AArch64::TCRETURNri; 5405 } 5406 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode)) 5407 .add(Call->getOperand(0)) 5408 .addImm(0); 5409 MBB.insert(MBB.end(), TC); 5410 Call->eraseFromParent(); 5411 } 5412 5413 // Is there a call in the outlined range? 5414 auto IsNonTailCall = [](MachineInstr &MI) { 5415 return MI.isCall() && !MI.isReturn(); 5416 }; 5417 if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) { 5418 // Fix up the instructions in the range, since we're going to modify the 5419 // stack. 5420 assert(OF.FrameConstructionID != MachineOutlinerDefault && 5421 "Can only fix up stack references once"); 5422 fixupPostOutline(MBB); 5423 5424 // LR has to be a live in so that we can save it. 5425 MBB.addLiveIn(AArch64::LR); 5426 5427 MachineBasicBlock::iterator It = MBB.begin(); 5428 MachineBasicBlock::iterator Et = MBB.end(); 5429 5430 if (OF.FrameConstructionID == MachineOutlinerTailCall || 5431 OF.FrameConstructionID == MachineOutlinerThunk) 5432 Et = std::prev(MBB.end()); 5433 5434 // Insert a save before the outlined region 5435 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) 5436 .addReg(AArch64::SP, RegState::Define) 5437 .addReg(AArch64::LR) 5438 .addReg(AArch64::SP) 5439 .addImm(-16); 5440 It = MBB.insert(It, STRXpre); 5441 5442 const TargetSubtargetInfo &STI = MF.getSubtarget(); 5443 const MCRegisterInfo *MRI = STI.getRegisterInfo(); 5444 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true); 5445 5446 // Add a CFI saying the stack was moved 16 B down. 5447 int64_t StackPosEntry = 5448 MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16)); 5449 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) 5450 .addCFIIndex(StackPosEntry) 5451 .setMIFlags(MachineInstr::FrameSetup); 5452 5453 // Add a CFI saying that the LR that we want to find is now 16 B higher than 5454 // before. 5455 int64_t LRPosEntry = 5456 MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16)); 5457 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) 5458 .addCFIIndex(LRPosEntry) 5459 .setMIFlags(MachineInstr::FrameSetup); 5460 5461 // Insert a restore before the terminator for the function. 5462 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) 5463 .addReg(AArch64::SP, RegState::Define) 5464 .addReg(AArch64::LR, RegState::Define) 5465 .addReg(AArch64::SP) 5466 .addImm(16); 5467 Et = MBB.insert(Et, LDRXpost); 5468 } 5469 5470 // If this is a tail call outlined function, then there's already a return. 5471 if (OF.FrameConstructionID == MachineOutlinerTailCall || 5472 OF.FrameConstructionID == MachineOutlinerThunk) 5473 return; 5474 5475 // It's not a tail call, so we have to insert the return ourselves. 5476 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET)) 5477 .addReg(AArch64::LR, RegState::Undef); 5478 MBB.insert(MBB.end(), ret); 5479 5480 // Did we have to modify the stack by saving the link register? 5481 if (OF.FrameConstructionID != MachineOutlinerDefault) 5482 return; 5483 5484 // We modified the stack. 5485 // Walk over the basic block and fix up all the stack accesses. 5486 fixupPostOutline(MBB); 5487 } 5488 5489 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( 5490 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, 5491 MachineFunction &MF, const outliner::Candidate &C) const { 5492 5493 // Are we tail calling? 5494 if (C.CallConstructionID == MachineOutlinerTailCall) { 5495 // If yes, then we can just branch to the label. 5496 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi)) 5497 .addGlobalAddress(M.getNamedValue(MF.getName())) 5498 .addImm(0)); 5499 return It; 5500 } 5501 5502 // Are we saving the link register? 5503 if (C.CallConstructionID == MachineOutlinerNoLRSave || 5504 C.CallConstructionID == MachineOutlinerThunk) { 5505 // No, so just insert the call. 5506 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) 5507 .addGlobalAddress(M.getNamedValue(MF.getName()))); 5508 return It; 5509 } 5510 5511 // We want to return the spot where we inserted the call. 5512 MachineBasicBlock::iterator CallPt; 5513 5514 // Instructions for saving and restoring LR around the call instruction we're 5515 // going to insert. 5516 MachineInstr *Save; 5517 MachineInstr *Restore; 5518 // Can we save to a register? 5519 if (C.CallConstructionID == MachineOutlinerRegSave) { 5520 // FIXME: This logic should be sunk into a target-specific interface so that 5521 // we don't have to recompute the register. 5522 unsigned Reg = findRegisterToSaveLRTo(C); 5523 assert(Reg != 0 && "No callee-saved register available?"); 5524 5525 // Save and restore LR from that register. 5526 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg) 5527 .addReg(AArch64::XZR) 5528 .addReg(AArch64::LR) 5529 .addImm(0); 5530 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR) 5531 .addReg(AArch64::XZR) 5532 .addReg(Reg) 5533 .addImm(0); 5534 } else { 5535 // We have the default case. Save and restore from SP. 5536 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) 5537 .addReg(AArch64::SP, RegState::Define) 5538 .addReg(AArch64::LR) 5539 .addReg(AArch64::SP) 5540 .addImm(-16); 5541 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) 5542 .addReg(AArch64::SP, RegState::Define) 5543 .addReg(AArch64::LR, RegState::Define) 5544 .addReg(AArch64::SP) 5545 .addImm(16); 5546 } 5547 5548 It = MBB.insert(It, Save); 5549 It++; 5550 5551 // Insert the call. 5552 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) 5553 .addGlobalAddress(M.getNamedValue(MF.getName()))); 5554 CallPt = It; 5555 It++; 5556 5557 It = MBB.insert(It, Restore); 5558 return CallPt; 5559 } 5560 5561 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault( 5562 MachineFunction &MF) const { 5563 return MF.getFunction().optForMinSize(); 5564 } 5565