1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the AArch64 implementation of the TargetInstrInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64InstrInfo.h" 15 #include "AArch64Subtarget.h" 16 #include "MCTargetDesc/AArch64AddressingModes.h" 17 #include "llvm/CodeGen/MachineFrameInfo.h" 18 #include "llvm/CodeGen/MachineInstrBuilder.h" 19 #include "llvm/CodeGen/MachineMemOperand.h" 20 #include "llvm/CodeGen/MachineRegisterInfo.h" 21 #include "llvm/CodeGen/PseudoSourceValue.h" 22 #include "llvm/MC/MCInst.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Support/TargetRegistry.h" 25 #include <algorithm> 26 27 using namespace llvm; 28 29 #define GET_INSTRINFO_CTOR_DTOR 30 #include "AArch64GenInstrInfo.inc" 31 32 static LLVM_CONSTEXPR MachineMemOperand::Flags MOSuppressPair = 33 MachineMemOperand::MOTargetFlag1; 34 35 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) 36 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), 37 RI(STI.getTargetTriple()), Subtarget(STI) {} 38 39 /// GetInstSize - Return the number of bytes of code the specified 40 /// instruction may be. This returns the maximum number of bytes. 41 unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const { 42 const MachineBasicBlock &MBB = *MI.getParent(); 43 const MachineFunction *MF = MBB.getParent(); 44 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 45 46 if (MI.getOpcode() == AArch64::INLINEASM) 47 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); 48 49 const MCInstrDesc &Desc = MI.getDesc(); 50 switch (Desc.getOpcode()) { 51 default: 52 // Anything not explicitly designated otherwise is a nomal 4-byte insn. 53 return 4; 54 case TargetOpcode::DBG_VALUE: 55 case TargetOpcode::EH_LABEL: 56 case TargetOpcode::IMPLICIT_DEF: 57 case TargetOpcode::KILL: 58 return 0; 59 } 60 61 llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size"); 62 } 63 64 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, 65 SmallVectorImpl<MachineOperand> &Cond) { 66 // Block ends with fall-through condbranch. 67 switch (LastInst->getOpcode()) { 68 default: 69 llvm_unreachable("Unknown branch instruction?"); 70 case AArch64::Bcc: 71 Target = LastInst->getOperand(1).getMBB(); 72 Cond.push_back(LastInst->getOperand(0)); 73 break; 74 case AArch64::CBZW: 75 case AArch64::CBZX: 76 case AArch64::CBNZW: 77 case AArch64::CBNZX: 78 Target = LastInst->getOperand(1).getMBB(); 79 Cond.push_back(MachineOperand::CreateImm(-1)); 80 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 81 Cond.push_back(LastInst->getOperand(0)); 82 break; 83 case AArch64::TBZW: 84 case AArch64::TBZX: 85 case AArch64::TBNZW: 86 case AArch64::TBNZX: 87 Target = LastInst->getOperand(2).getMBB(); 88 Cond.push_back(MachineOperand::CreateImm(-1)); 89 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 90 Cond.push_back(LastInst->getOperand(0)); 91 Cond.push_back(LastInst->getOperand(1)); 92 } 93 } 94 95 // Branch analysis. 96 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, 97 MachineBasicBlock *&TBB, 98 MachineBasicBlock *&FBB, 99 SmallVectorImpl<MachineOperand> &Cond, 100 bool AllowModify) const { 101 // If the block has no terminators, it just falls into the block after it. 102 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 103 if (I == MBB.end()) 104 return false; 105 106 if (!isUnpredicatedTerminator(*I)) 107 return false; 108 109 // Get the last instruction in the block. 110 MachineInstr *LastInst = &*I; 111 112 // If there is only one terminator instruction, process it. 113 unsigned LastOpc = LastInst->getOpcode(); 114 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 115 if (isUncondBranchOpcode(LastOpc)) { 116 TBB = LastInst->getOperand(0).getMBB(); 117 return false; 118 } 119 if (isCondBranchOpcode(LastOpc)) { 120 // Block ends with fall-through condbranch. 121 parseCondBranch(LastInst, TBB, Cond); 122 return false; 123 } 124 return true; // Can't handle indirect branch. 125 } 126 127 // Get the instruction before it if it is a terminator. 128 MachineInstr *SecondLastInst = &*I; 129 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 130 131 // If AllowModify is true and the block ends with two or more unconditional 132 // branches, delete all but the first unconditional branch. 133 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 134 while (isUncondBranchOpcode(SecondLastOpc)) { 135 LastInst->eraseFromParent(); 136 LastInst = SecondLastInst; 137 LastOpc = LastInst->getOpcode(); 138 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 139 // Return now the only terminator is an unconditional branch. 140 TBB = LastInst->getOperand(0).getMBB(); 141 return false; 142 } else { 143 SecondLastInst = &*I; 144 SecondLastOpc = SecondLastInst->getOpcode(); 145 } 146 } 147 } 148 149 // If there are three terminators, we don't know what sort of block this is. 150 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) 151 return true; 152 153 // If the block ends with a B and a Bcc, handle it. 154 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 155 parseCondBranch(SecondLastInst, TBB, Cond); 156 FBB = LastInst->getOperand(0).getMBB(); 157 return false; 158 } 159 160 // If the block ends with two unconditional branches, handle it. The second 161 // one is not executed, so remove it. 162 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 163 TBB = SecondLastInst->getOperand(0).getMBB(); 164 I = LastInst; 165 if (AllowModify) 166 I->eraseFromParent(); 167 return false; 168 } 169 170 // ...likewise if it ends with an indirect branch followed by an unconditional 171 // branch. 172 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 173 I = LastInst; 174 if (AllowModify) 175 I->eraseFromParent(); 176 return true; 177 } 178 179 // Otherwise, can't handle this. 180 return true; 181 } 182 183 bool AArch64InstrInfo::ReverseBranchCondition( 184 SmallVectorImpl<MachineOperand> &Cond) const { 185 if (Cond[0].getImm() != -1) { 186 // Regular Bcc 187 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 188 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); 189 } else { 190 // Folded compare-and-branch 191 switch (Cond[1].getImm()) { 192 default: 193 llvm_unreachable("Unknown conditional branch!"); 194 case AArch64::CBZW: 195 Cond[1].setImm(AArch64::CBNZW); 196 break; 197 case AArch64::CBNZW: 198 Cond[1].setImm(AArch64::CBZW); 199 break; 200 case AArch64::CBZX: 201 Cond[1].setImm(AArch64::CBNZX); 202 break; 203 case AArch64::CBNZX: 204 Cond[1].setImm(AArch64::CBZX); 205 break; 206 case AArch64::TBZW: 207 Cond[1].setImm(AArch64::TBNZW); 208 break; 209 case AArch64::TBNZW: 210 Cond[1].setImm(AArch64::TBZW); 211 break; 212 case AArch64::TBZX: 213 Cond[1].setImm(AArch64::TBNZX); 214 break; 215 case AArch64::TBNZX: 216 Cond[1].setImm(AArch64::TBZX); 217 break; 218 } 219 } 220 221 return false; 222 } 223 224 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 225 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 226 if (I == MBB.end()) 227 return 0; 228 229 if (!isUncondBranchOpcode(I->getOpcode()) && 230 !isCondBranchOpcode(I->getOpcode())) 231 return 0; 232 233 // Remove the branch. 234 I->eraseFromParent(); 235 236 I = MBB.end(); 237 238 if (I == MBB.begin()) 239 return 1; 240 --I; 241 if (!isCondBranchOpcode(I->getOpcode())) 242 return 1; 243 244 // Remove the branch. 245 I->eraseFromParent(); 246 return 2; 247 } 248 249 void AArch64InstrInfo::instantiateCondBranch( 250 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB, 251 ArrayRef<MachineOperand> Cond) const { 252 if (Cond[0].getImm() != -1) { 253 // Regular Bcc 254 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); 255 } else { 256 // Folded compare-and-branch 257 // Note that we use addOperand instead of addReg to keep the flags. 258 const MachineInstrBuilder MIB = 259 BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]); 260 if (Cond.size() > 3) 261 MIB.addImm(Cond[3].getImm()); 262 MIB.addMBB(TBB); 263 } 264 } 265 266 unsigned AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, 267 MachineBasicBlock *TBB, 268 MachineBasicBlock *FBB, 269 ArrayRef<MachineOperand> Cond, 270 const DebugLoc &DL) const { 271 // Shouldn't be a fall through. 272 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 273 274 if (!FBB) { 275 if (Cond.empty()) // Unconditional branch? 276 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); 277 else 278 instantiateCondBranch(MBB, DL, TBB, Cond); 279 return 1; 280 } 281 282 // Two-way conditional branch. 283 instantiateCondBranch(MBB, DL, TBB, Cond); 284 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); 285 return 2; 286 } 287 288 // Find the original register that VReg is copied from. 289 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { 290 while (TargetRegisterInfo::isVirtualRegister(VReg)) { 291 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 292 if (!DefMI->isFullCopy()) 293 return VReg; 294 VReg = DefMI->getOperand(1).getReg(); 295 } 296 return VReg; 297 } 298 299 // Determine if VReg is defined by an instruction that can be folded into a 300 // csel instruction. If so, return the folded opcode, and the replacement 301 // register. 302 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, 303 unsigned *NewVReg = nullptr) { 304 VReg = removeCopies(MRI, VReg); 305 if (!TargetRegisterInfo::isVirtualRegister(VReg)) 306 return 0; 307 308 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); 309 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 310 unsigned Opc = 0; 311 unsigned SrcOpNum = 0; 312 switch (DefMI->getOpcode()) { 313 case AArch64::ADDSXri: 314 case AArch64::ADDSWri: 315 // if NZCV is used, do not fold. 316 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 317 return 0; 318 // fall-through to ADDXri and ADDWri. 319 case AArch64::ADDXri: 320 case AArch64::ADDWri: 321 // add x, 1 -> csinc. 322 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || 323 DefMI->getOperand(3).getImm() != 0) 324 return 0; 325 SrcOpNum = 1; 326 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; 327 break; 328 329 case AArch64::ORNXrr: 330 case AArch64::ORNWrr: { 331 // not x -> csinv, represented as orn dst, xzr, src. 332 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 333 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 334 return 0; 335 SrcOpNum = 2; 336 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; 337 break; 338 } 339 340 case AArch64::SUBSXrr: 341 case AArch64::SUBSWrr: 342 // if NZCV is used, do not fold. 343 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 344 return 0; 345 // fall-through to SUBXrr and SUBWrr. 346 case AArch64::SUBXrr: 347 case AArch64::SUBWrr: { 348 // neg x -> csneg, represented as sub dst, xzr, src. 349 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 350 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 351 return 0; 352 SrcOpNum = 2; 353 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; 354 break; 355 } 356 default: 357 return 0; 358 } 359 assert(Opc && SrcOpNum && "Missing parameters"); 360 361 if (NewVReg) 362 *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); 363 return Opc; 364 } 365 366 bool AArch64InstrInfo::canInsertSelect( 367 const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond, 368 unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, 369 int &FalseCycles) const { 370 // Check register classes. 371 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 372 const TargetRegisterClass *RC = 373 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 374 if (!RC) 375 return false; 376 377 // Expanding cbz/tbz requires an extra cycle of latency on the condition. 378 unsigned ExtraCondLat = Cond.size() != 1; 379 380 // GPRs are handled by csel. 381 // FIXME: Fold in x+1, -x, and ~x when applicable. 382 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || 383 AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 384 // Single-cycle csel, csinc, csinv, and csneg. 385 CondCycles = 1 + ExtraCondLat; 386 TrueCycles = FalseCycles = 1; 387 if (canFoldIntoCSel(MRI, TrueReg)) 388 TrueCycles = 0; 389 else if (canFoldIntoCSel(MRI, FalseReg)) 390 FalseCycles = 0; 391 return true; 392 } 393 394 // Scalar floating point is handled by fcsel. 395 // FIXME: Form fabs, fmin, and fmax when applicable. 396 if (AArch64::FPR64RegClass.hasSubClassEq(RC) || 397 AArch64::FPR32RegClass.hasSubClassEq(RC)) { 398 CondCycles = 5 + ExtraCondLat; 399 TrueCycles = FalseCycles = 2; 400 return true; 401 } 402 403 // Can't do vectors. 404 return false; 405 } 406 407 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, 408 MachineBasicBlock::iterator I, 409 const DebugLoc &DL, unsigned DstReg, 410 ArrayRef<MachineOperand> Cond, 411 unsigned TrueReg, unsigned FalseReg) const { 412 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 413 414 // Parse the condition code, see parseCondBranch() above. 415 AArch64CC::CondCode CC; 416 switch (Cond.size()) { 417 default: 418 llvm_unreachable("Unknown condition opcode in Cond"); 419 case 1: // b.cc 420 CC = AArch64CC::CondCode(Cond[0].getImm()); 421 break; 422 case 3: { // cbz/cbnz 423 // We must insert a compare against 0. 424 bool Is64Bit; 425 switch (Cond[1].getImm()) { 426 default: 427 llvm_unreachable("Unknown branch opcode in Cond"); 428 case AArch64::CBZW: 429 Is64Bit = 0; 430 CC = AArch64CC::EQ; 431 break; 432 case AArch64::CBZX: 433 Is64Bit = 1; 434 CC = AArch64CC::EQ; 435 break; 436 case AArch64::CBNZW: 437 Is64Bit = 0; 438 CC = AArch64CC::NE; 439 break; 440 case AArch64::CBNZX: 441 Is64Bit = 1; 442 CC = AArch64CC::NE; 443 break; 444 } 445 unsigned SrcReg = Cond[2].getReg(); 446 if (Is64Bit) { 447 // cmp reg, #0 is actually subs xzr, reg, #0. 448 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); 449 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) 450 .addReg(SrcReg) 451 .addImm(0) 452 .addImm(0); 453 } else { 454 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); 455 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) 456 .addReg(SrcReg) 457 .addImm(0) 458 .addImm(0); 459 } 460 break; 461 } 462 case 4: { // tbz/tbnz 463 // We must insert a tst instruction. 464 switch (Cond[1].getImm()) { 465 default: 466 llvm_unreachable("Unknown branch opcode in Cond"); 467 case AArch64::TBZW: 468 case AArch64::TBZX: 469 CC = AArch64CC::EQ; 470 break; 471 case AArch64::TBNZW: 472 case AArch64::TBNZX: 473 CC = AArch64CC::NE; 474 break; 475 } 476 // cmp reg, #foo is actually ands xzr, reg, #1<<foo. 477 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW) 478 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR) 479 .addReg(Cond[2].getReg()) 480 .addImm( 481 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32)); 482 else 483 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR) 484 .addReg(Cond[2].getReg()) 485 .addImm( 486 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); 487 break; 488 } 489 } 490 491 unsigned Opc = 0; 492 const TargetRegisterClass *RC = nullptr; 493 bool TryFold = false; 494 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) { 495 RC = &AArch64::GPR64RegClass; 496 Opc = AArch64::CSELXr; 497 TryFold = true; 498 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) { 499 RC = &AArch64::GPR32RegClass; 500 Opc = AArch64::CSELWr; 501 TryFold = true; 502 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) { 503 RC = &AArch64::FPR64RegClass; 504 Opc = AArch64::FCSELDrrr; 505 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) { 506 RC = &AArch64::FPR32RegClass; 507 Opc = AArch64::FCSELSrrr; 508 } 509 assert(RC && "Unsupported regclass"); 510 511 // Try folding simple instructions into the csel. 512 if (TryFold) { 513 unsigned NewVReg = 0; 514 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); 515 if (FoldedOpc) { 516 // The folded opcodes csinc, csinc and csneg apply the operation to 517 // FalseReg, so we need to invert the condition. 518 CC = AArch64CC::getInvertedCondCode(CC); 519 TrueReg = FalseReg; 520 } else 521 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); 522 523 // Fold the operation. Leave any dead instructions for DCE to clean up. 524 if (FoldedOpc) { 525 FalseReg = NewVReg; 526 Opc = FoldedOpc; 527 // The extends the live range of NewVReg. 528 MRI.clearKillFlags(NewVReg); 529 } 530 } 531 532 // Pull all virtual register into the appropriate class. 533 MRI.constrainRegClass(TrueReg, RC); 534 MRI.constrainRegClass(FalseReg, RC); 535 536 // Insert the csel. 537 BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm( 538 CC); 539 } 540 541 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx. 542 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) { 543 uint64_t Imm = MI.getOperand(1).getImm(); 544 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 545 uint64_t Encoding; 546 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding); 547 } 548 549 // FIXME: this implementation should be micro-architecture dependent, so a 550 // micro-architecture target hook should be introduced here in future. 551 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { 552 if (!Subtarget.hasCustomCheapAsMoveHandling()) 553 return MI.isAsCheapAsAMove(); 554 555 unsigned Imm; 556 557 switch (MI.getOpcode()) { 558 default: 559 return false; 560 561 // add/sub on register without shift 562 case AArch64::ADDWri: 563 case AArch64::ADDXri: 564 case AArch64::SUBWri: 565 case AArch64::SUBXri: 566 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 || 567 MI.getOperand(3).getImm() == 0); 568 569 // add/sub on register with shift 570 case AArch64::ADDWrs: 571 case AArch64::ADDXrs: 572 case AArch64::SUBWrs: 573 case AArch64::SUBXrs: 574 Imm = MI.getOperand(3).getImm(); 575 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 && 576 AArch64_AM::getArithShiftValue(Imm) < 4); 577 578 // logical ops on immediate 579 case AArch64::ANDWri: 580 case AArch64::ANDXri: 581 case AArch64::EORWri: 582 case AArch64::EORXri: 583 case AArch64::ORRWri: 584 case AArch64::ORRXri: 585 return true; 586 587 // logical ops on register without shift 588 case AArch64::ANDWrr: 589 case AArch64::ANDXrr: 590 case AArch64::BICWrr: 591 case AArch64::BICXrr: 592 case AArch64::EONWrr: 593 case AArch64::EONXrr: 594 case AArch64::EORWrr: 595 case AArch64::EORXrr: 596 case AArch64::ORNWrr: 597 case AArch64::ORNXrr: 598 case AArch64::ORRWrr: 599 case AArch64::ORRXrr: 600 return true; 601 602 // logical ops on register with shift 603 case AArch64::ANDWrs: 604 case AArch64::ANDXrs: 605 case AArch64::BICWrs: 606 case AArch64::BICXrs: 607 case AArch64::EONWrs: 608 case AArch64::EONXrs: 609 case AArch64::EORWrs: 610 case AArch64::EORXrs: 611 case AArch64::ORNWrs: 612 case AArch64::ORNXrs: 613 case AArch64::ORRWrs: 614 case AArch64::ORRXrs: 615 Imm = MI.getOperand(3).getImm(); 616 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 && 617 AArch64_AM::getShiftValue(Imm) < 4 && 618 AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL); 619 620 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or 621 // ORRXri, it is as cheap as MOV 622 case AArch64::MOVi32imm: 623 return canBeExpandedToORR(MI, 32); 624 case AArch64::MOVi64imm: 625 return canBeExpandedToORR(MI, 64); 626 627 // It is cheap to move #0 to float registers if the subtarget has 628 // ZeroCycleZeroing feature. 629 case AArch64::FMOVS0: 630 case AArch64::FMOVD0: 631 return Subtarget.hasZeroCycleZeroing(); 632 } 633 634 llvm_unreachable("Unknown opcode to check as cheap as a move!"); 635 } 636 637 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 638 unsigned &SrcReg, unsigned &DstReg, 639 unsigned &SubIdx) const { 640 switch (MI.getOpcode()) { 641 default: 642 return false; 643 case AArch64::SBFMXri: // aka sxtw 644 case AArch64::UBFMXri: // aka uxtw 645 // Check for the 32 -> 64 bit extension case, these instructions can do 646 // much more. 647 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) 648 return false; 649 // This is a signed or unsigned 32 -> 64 bit extension. 650 SrcReg = MI.getOperand(1).getReg(); 651 DstReg = MI.getOperand(0).getReg(); 652 SubIdx = AArch64::sub_32; 653 return true; 654 } 655 } 656 657 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( 658 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const { 659 const TargetRegisterInfo *TRI = &getRegisterInfo(); 660 unsigned BaseRegA = 0, BaseRegB = 0; 661 int64_t OffsetA = 0, OffsetB = 0; 662 unsigned WidthA = 0, WidthB = 0; 663 664 assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); 665 assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); 666 667 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || 668 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 669 return false; 670 671 // Retrieve the base register, offset from the base register and width. Width 672 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If 673 // base registers are identical, and the offset of a lower memory access + 674 // the width doesn't overlap the offset of a higher memory access, 675 // then the memory accesses are different. 676 if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) && 677 getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) { 678 if (BaseRegA == BaseRegB) { 679 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 680 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 681 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 682 if (LowOffset + LowWidth <= HighOffset) 683 return true; 684 } 685 } 686 return false; 687 } 688 689 /// analyzeCompare - For a comparison instruction, return the source registers 690 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. 691 /// Return true if the comparison instruction can be analyzed. 692 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, 693 unsigned &SrcReg2, int &CmpMask, 694 int &CmpValue) const { 695 switch (MI.getOpcode()) { 696 default: 697 break; 698 case AArch64::SUBSWrr: 699 case AArch64::SUBSWrs: 700 case AArch64::SUBSWrx: 701 case AArch64::SUBSXrr: 702 case AArch64::SUBSXrs: 703 case AArch64::SUBSXrx: 704 case AArch64::ADDSWrr: 705 case AArch64::ADDSWrs: 706 case AArch64::ADDSWrx: 707 case AArch64::ADDSXrr: 708 case AArch64::ADDSXrs: 709 case AArch64::ADDSXrx: 710 // Replace SUBSWrr with SUBWrr if NZCV is not used. 711 SrcReg = MI.getOperand(1).getReg(); 712 SrcReg2 = MI.getOperand(2).getReg(); 713 CmpMask = ~0; 714 CmpValue = 0; 715 return true; 716 case AArch64::SUBSWri: 717 case AArch64::ADDSWri: 718 case AArch64::SUBSXri: 719 case AArch64::ADDSXri: 720 SrcReg = MI.getOperand(1).getReg(); 721 SrcReg2 = 0; 722 CmpMask = ~0; 723 // FIXME: In order to convert CmpValue to 0 or 1 724 CmpValue = MI.getOperand(2).getImm() != 0; 725 return true; 726 case AArch64::ANDSWri: 727 case AArch64::ANDSXri: 728 // ANDS does not use the same encoding scheme as the others xxxS 729 // instructions. 730 SrcReg = MI.getOperand(1).getReg(); 731 SrcReg2 = 0; 732 CmpMask = ~0; 733 // FIXME:The return val type of decodeLogicalImmediate is uint64_t, 734 // while the type of CmpValue is int. When converting uint64_t to int, 735 // the high 32 bits of uint64_t will be lost. 736 // In fact it causes a bug in spec2006-483.xalancbmk 737 // CmpValue is only used to compare with zero in OptimizeCompareInstr 738 CmpValue = AArch64_AM::decodeLogicalImmediate( 739 MI.getOperand(2).getImm(), 740 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0; 741 return true; 742 } 743 744 return false; 745 } 746 747 static bool UpdateOperandRegClass(MachineInstr &Instr) { 748 MachineBasicBlock *MBB = Instr.getParent(); 749 assert(MBB && "Can't get MachineBasicBlock here"); 750 MachineFunction *MF = MBB->getParent(); 751 assert(MF && "Can't get MachineFunction here"); 752 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 753 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 754 MachineRegisterInfo *MRI = &MF->getRegInfo(); 755 756 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx; 757 ++OpIdx) { 758 MachineOperand &MO = Instr.getOperand(OpIdx); 759 const TargetRegisterClass *OpRegCstraints = 760 Instr.getRegClassConstraint(OpIdx, TII, TRI); 761 762 // If there's no constraint, there's nothing to do. 763 if (!OpRegCstraints) 764 continue; 765 // If the operand is a frame index, there's nothing to do here. 766 // A frame index operand will resolve correctly during PEI. 767 if (MO.isFI()) 768 continue; 769 770 assert(MO.isReg() && 771 "Operand has register constraints without being a register!"); 772 773 unsigned Reg = MO.getReg(); 774 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 775 if (!OpRegCstraints->contains(Reg)) 776 return false; 777 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && 778 !MRI->constrainRegClass(Reg, OpRegCstraints)) 779 return false; 780 } 781 782 return true; 783 } 784 785 /// \brief Return the opcode that does not set flags when possible - otherwise 786 /// return the original opcode. The caller is responsible to do the actual 787 /// substitution and legality checking. 788 static unsigned convertFlagSettingOpcode(const MachineInstr &MI) { 789 // Don't convert all compare instructions, because for some the zero register 790 // encoding becomes the sp register. 791 bool MIDefinesZeroReg = false; 792 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR)) 793 MIDefinesZeroReg = true; 794 795 switch (MI.getOpcode()) { 796 default: 797 return MI.getOpcode(); 798 case AArch64::ADDSWrr: 799 return AArch64::ADDWrr; 800 case AArch64::ADDSWri: 801 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri; 802 case AArch64::ADDSWrs: 803 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs; 804 case AArch64::ADDSWrx: 805 return AArch64::ADDWrx; 806 case AArch64::ADDSXrr: 807 return AArch64::ADDXrr; 808 case AArch64::ADDSXri: 809 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri; 810 case AArch64::ADDSXrs: 811 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs; 812 case AArch64::ADDSXrx: 813 return AArch64::ADDXrx; 814 case AArch64::SUBSWrr: 815 return AArch64::SUBWrr; 816 case AArch64::SUBSWri: 817 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri; 818 case AArch64::SUBSWrs: 819 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs; 820 case AArch64::SUBSWrx: 821 return AArch64::SUBWrx; 822 case AArch64::SUBSXrr: 823 return AArch64::SUBXrr; 824 case AArch64::SUBSXri: 825 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri; 826 case AArch64::SUBSXrs: 827 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs; 828 case AArch64::SUBSXrx: 829 return AArch64::SUBXrx; 830 } 831 } 832 833 enum AccessKind { 834 AK_Write = 0x01, 835 AK_Read = 0x10, 836 AK_All = 0x11 837 }; 838 839 /// True when condition flags are accessed (either by writing or reading) 840 /// on the instruction trace starting at From and ending at To. 841 /// 842 /// Note: If From and To are from different blocks it's assumed CC are accessed 843 /// on the path. 844 static bool areCFlagsAccessedBetweenInstrs( 845 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, 846 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) { 847 // Early exit if To is at the beginning of the BB. 848 if (To == To->getParent()->begin()) 849 return true; 850 851 // Check whether the instructions are in the same basic block 852 // If not, assume the condition flags might get modified somewhere. 853 if (To->getParent() != From->getParent()) 854 return true; 855 856 // From must be above To. 857 assert(std::find_if(MachineBasicBlock::reverse_iterator(To), 858 To->getParent()->rend(), [From](MachineInstr &MI) { 859 return MachineBasicBlock::iterator(MI) == From; 860 }) != To->getParent()->rend()); 861 862 // We iterate backward starting \p To until we hit \p From. 863 for (--To; To != From; --To) { 864 const MachineInstr &Instr = *To; 865 866 if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) || 867 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) 868 return true; 869 } 870 return false; 871 } 872 873 /// Try to optimize a compare instruction. A compare instruction is an 874 /// instruction which produces AArch64::NZCV. It can be truly compare instruction 875 /// when there are no uses of its destination register. 876 /// 877 /// The following steps are tried in order: 878 /// 1. Convert CmpInstr into an unconditional version. 879 /// 2. Remove CmpInstr if above there is an instruction producing a needed 880 /// condition code or an instruction which can be converted into such an instruction. 881 /// Only comparison with zero is supported. 882 bool AArch64InstrInfo::optimizeCompareInstr( 883 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, 884 int CmpValue, const MachineRegisterInfo *MRI) const { 885 assert(CmpInstr.getParent()); 886 assert(MRI); 887 888 // Replace SUBSWrr with SUBWrr if NZCV is not used. 889 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true); 890 if (DeadNZCVIdx != -1) { 891 if (CmpInstr.definesRegister(AArch64::WZR) || 892 CmpInstr.definesRegister(AArch64::XZR)) { 893 CmpInstr.eraseFromParent(); 894 return true; 895 } 896 unsigned Opc = CmpInstr.getOpcode(); 897 unsigned NewOpc = convertFlagSettingOpcode(CmpInstr); 898 if (NewOpc == Opc) 899 return false; 900 const MCInstrDesc &MCID = get(NewOpc); 901 CmpInstr.setDesc(MCID); 902 CmpInstr.RemoveOperand(DeadNZCVIdx); 903 bool succeeded = UpdateOperandRegClass(CmpInstr); 904 (void)succeeded; 905 assert(succeeded && "Some operands reg class are incompatible!"); 906 return true; 907 } 908 909 // Continue only if we have a "ri" where immediate is zero. 910 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare 911 // function. 912 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); 913 if (CmpValue != 0 || SrcReg2 != 0) 914 return false; 915 916 // CmpInstr is a Compare instruction if destination register is not used. 917 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) 918 return false; 919 920 return substituteCmpToZero(CmpInstr, SrcReg, MRI); 921 } 922 923 /// Get opcode of S version of Instr. 924 /// If Instr is S version its opcode is returned. 925 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version 926 /// or we are not interested in it. 927 static unsigned sForm(MachineInstr &Instr) { 928 switch (Instr.getOpcode()) { 929 default: 930 return AArch64::INSTRUCTION_LIST_END; 931 932 case AArch64::ADDSWrr: 933 case AArch64::ADDSWri: 934 case AArch64::ADDSXrr: 935 case AArch64::ADDSXri: 936 case AArch64::SUBSWrr: 937 case AArch64::SUBSWri: 938 case AArch64::SUBSXrr: 939 case AArch64::SUBSXri: 940 return Instr.getOpcode();; 941 942 case AArch64::ADDWrr: return AArch64::ADDSWrr; 943 case AArch64::ADDWri: return AArch64::ADDSWri; 944 case AArch64::ADDXrr: return AArch64::ADDSXrr; 945 case AArch64::ADDXri: return AArch64::ADDSXri; 946 case AArch64::ADCWr: return AArch64::ADCSWr; 947 case AArch64::ADCXr: return AArch64::ADCSXr; 948 case AArch64::SUBWrr: return AArch64::SUBSWrr; 949 case AArch64::SUBWri: return AArch64::SUBSWri; 950 case AArch64::SUBXrr: return AArch64::SUBSXrr; 951 case AArch64::SUBXri: return AArch64::SUBSXri; 952 case AArch64::SBCWr: return AArch64::SBCSWr; 953 case AArch64::SBCXr: return AArch64::SBCSXr; 954 case AArch64::ANDWri: return AArch64::ANDSWri; 955 case AArch64::ANDXri: return AArch64::ANDSXri; 956 } 957 } 958 959 /// Check if AArch64::NZCV should be alive in successors of MBB. 960 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) { 961 for (auto *BB : MBB->successors()) 962 if (BB->isLiveIn(AArch64::NZCV)) 963 return true; 964 return false; 965 } 966 967 struct UsedNZCV { 968 bool N; 969 bool Z; 970 bool C; 971 bool V; 972 UsedNZCV(): N(false), Z(false), C(false), V(false) {} 973 UsedNZCV& operator |=(const UsedNZCV& UsedFlags) { 974 this->N |= UsedFlags.N; 975 this->Z |= UsedFlags.Z; 976 this->C |= UsedFlags.C; 977 this->V |= UsedFlags.V; 978 return *this; 979 } 980 }; 981 982 /// Find a condition code used by the instruction. 983 /// Returns AArch64CC::Invalid if either the instruction does not use condition 984 /// codes or we don't optimize CmpInstr in the presence of such instructions. 985 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) { 986 switch (Instr.getOpcode()) { 987 default: 988 return AArch64CC::Invalid; 989 990 case AArch64::Bcc: { 991 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 992 assert(Idx >= 2); 993 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm()); 994 } 995 996 case AArch64::CSINVWr: 997 case AArch64::CSINVXr: 998 case AArch64::CSINCWr: 999 case AArch64::CSINCXr: 1000 case AArch64::CSELWr: 1001 case AArch64::CSELXr: 1002 case AArch64::CSNEGWr: 1003 case AArch64::CSNEGXr: 1004 case AArch64::FCSELSrrr: 1005 case AArch64::FCSELDrrr: { 1006 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1007 assert(Idx >= 1); 1008 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm()); 1009 } 1010 } 1011 } 1012 1013 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { 1014 assert(CC != AArch64CC::Invalid); 1015 UsedNZCV UsedFlags; 1016 switch (CC) { 1017 default: 1018 break; 1019 1020 case AArch64CC::EQ: // Z set 1021 case AArch64CC::NE: // Z clear 1022 UsedFlags.Z = true; 1023 break; 1024 1025 case AArch64CC::HI: // Z clear and C set 1026 case AArch64CC::LS: // Z set or C clear 1027 UsedFlags.Z = true; 1028 case AArch64CC::HS: // C set 1029 case AArch64CC::LO: // C clear 1030 UsedFlags.C = true; 1031 break; 1032 1033 case AArch64CC::MI: // N set 1034 case AArch64CC::PL: // N clear 1035 UsedFlags.N = true; 1036 break; 1037 1038 case AArch64CC::VS: // V set 1039 case AArch64CC::VC: // V clear 1040 UsedFlags.V = true; 1041 break; 1042 1043 case AArch64CC::GT: // Z clear, N and V the same 1044 case AArch64CC::LE: // Z set, N and V differ 1045 UsedFlags.Z = true; 1046 case AArch64CC::GE: // N and V the same 1047 case AArch64CC::LT: // N and V differ 1048 UsedFlags.N = true; 1049 UsedFlags.V = true; 1050 break; 1051 } 1052 return UsedFlags; 1053 } 1054 1055 static bool isADDSRegImm(unsigned Opcode) { 1056 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri; 1057 } 1058 1059 static bool isSUBSRegImm(unsigned Opcode) { 1060 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri; 1061 } 1062 1063 /// Check if CmpInstr can be substituted by MI. 1064 /// 1065 /// CmpInstr can be substituted: 1066 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0' 1067 /// - and, MI and CmpInstr are from the same MachineBB 1068 /// - and, condition flags are not alive in successors of the CmpInstr parent 1069 /// - and, if MI opcode is the S form there must be no defs of flags between 1070 /// MI and CmpInstr 1071 /// or if MI opcode is not the S form there must be neither defs of flags 1072 /// nor uses of flags between MI and CmpInstr. 1073 /// - and C/V flags are not used after CmpInstr 1074 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, 1075 const TargetRegisterInfo *TRI) { 1076 assert(MI); 1077 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END); 1078 assert(CmpInstr); 1079 1080 const unsigned CmpOpcode = CmpInstr->getOpcode(); 1081 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode)) 1082 return false; 1083 1084 if (MI->getParent() != CmpInstr->getParent()) 1085 return false; 1086 1087 if (areCFlagsAliveInSuccessors(CmpInstr->getParent())) 1088 return false; 1089 1090 AccessKind AccessToCheck = AK_Write; 1091 if (sForm(*MI) != MI->getOpcode()) 1092 AccessToCheck = AK_All; 1093 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck)) 1094 return false; 1095 1096 UsedNZCV NZCVUsedAfterCmp; 1097 for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end(); 1098 I != E; ++I) { 1099 const MachineInstr &Instr = *I; 1100 if (Instr.readsRegister(AArch64::NZCV, TRI)) { 1101 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr); 1102 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction 1103 return false; 1104 NZCVUsedAfterCmp |= getUsedNZCV(CC); 1105 } 1106 1107 if (Instr.modifiesRegister(AArch64::NZCV, TRI)) 1108 break; 1109 } 1110 1111 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V; 1112 } 1113 1114 /// Substitute an instruction comparing to zero with another instruction 1115 /// which produces needed condition flags. 1116 /// 1117 /// Return true on success. 1118 bool AArch64InstrInfo::substituteCmpToZero( 1119 MachineInstr &CmpInstr, unsigned SrcReg, 1120 const MachineRegisterInfo *MRI) const { 1121 assert(MRI); 1122 // Get the unique definition of SrcReg. 1123 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 1124 if (!MI) 1125 return false; 1126 1127 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1128 1129 unsigned NewOpc = sForm(*MI); 1130 if (NewOpc == AArch64::INSTRUCTION_LIST_END) 1131 return false; 1132 1133 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI)) 1134 return false; 1135 1136 // Update the instruction to set NZCV. 1137 MI->setDesc(get(NewOpc)); 1138 CmpInstr.eraseFromParent(); 1139 bool succeeded = UpdateOperandRegClass(*MI); 1140 (void)succeeded; 1141 assert(succeeded && "Some operands reg class are incompatible!"); 1142 MI->addRegisterDefined(AArch64::NZCV, TRI); 1143 return true; 1144 } 1145 1146 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 1147 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD) 1148 return false; 1149 1150 MachineBasicBlock &MBB = *MI.getParent(); 1151 DebugLoc DL = MI.getDebugLoc(); 1152 unsigned Reg = MI.getOperand(0).getReg(); 1153 const GlobalValue *GV = 1154 cast<GlobalValue>((*MI.memoperands_begin())->getValue()); 1155 const TargetMachine &TM = MBB.getParent()->getTarget(); 1156 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); 1157 const unsigned char MO_NC = AArch64II::MO_NC; 1158 1159 if ((OpFlags & AArch64II::MO_GOT) != 0) { 1160 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) 1161 .addGlobalAddress(GV, 0, AArch64II::MO_GOT); 1162 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1163 .addReg(Reg, RegState::Kill) 1164 .addImm(0) 1165 .addMemOperand(*MI.memoperands_begin()); 1166 } else if (TM.getCodeModel() == CodeModel::Large) { 1167 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) 1168 .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48); 1169 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1170 .addReg(Reg, RegState::Kill) 1171 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32); 1172 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1173 .addReg(Reg, RegState::Kill) 1174 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16); 1175 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1176 .addReg(Reg, RegState::Kill) 1177 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0); 1178 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1179 .addReg(Reg, RegState::Kill) 1180 .addImm(0) 1181 .addMemOperand(*MI.memoperands_begin()); 1182 } else { 1183 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) 1184 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); 1185 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; 1186 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1187 .addReg(Reg, RegState::Kill) 1188 .addGlobalAddress(GV, 0, LoFlags) 1189 .addMemOperand(*MI.memoperands_begin()); 1190 } 1191 1192 MBB.erase(MI); 1193 1194 return true; 1195 } 1196 1197 /// Return true if this is this instruction has a non-zero immediate 1198 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const { 1199 switch (MI.getOpcode()) { 1200 default: 1201 break; 1202 case AArch64::ADDSWrs: 1203 case AArch64::ADDSXrs: 1204 case AArch64::ADDWrs: 1205 case AArch64::ADDXrs: 1206 case AArch64::ANDSWrs: 1207 case AArch64::ANDSXrs: 1208 case AArch64::ANDWrs: 1209 case AArch64::ANDXrs: 1210 case AArch64::BICSWrs: 1211 case AArch64::BICSXrs: 1212 case AArch64::BICWrs: 1213 case AArch64::BICXrs: 1214 case AArch64::CRC32Brr: 1215 case AArch64::CRC32CBrr: 1216 case AArch64::CRC32CHrr: 1217 case AArch64::CRC32CWrr: 1218 case AArch64::CRC32CXrr: 1219 case AArch64::CRC32Hrr: 1220 case AArch64::CRC32Wrr: 1221 case AArch64::CRC32Xrr: 1222 case AArch64::EONWrs: 1223 case AArch64::EONXrs: 1224 case AArch64::EORWrs: 1225 case AArch64::EORXrs: 1226 case AArch64::ORNWrs: 1227 case AArch64::ORNXrs: 1228 case AArch64::ORRWrs: 1229 case AArch64::ORRXrs: 1230 case AArch64::SUBSWrs: 1231 case AArch64::SUBSXrs: 1232 case AArch64::SUBWrs: 1233 case AArch64::SUBXrs: 1234 if (MI.getOperand(3).isImm()) { 1235 unsigned val = MI.getOperand(3).getImm(); 1236 return (val != 0); 1237 } 1238 break; 1239 } 1240 return false; 1241 } 1242 1243 /// Return true if this is this instruction has a non-zero immediate 1244 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) const { 1245 switch (MI.getOpcode()) { 1246 default: 1247 break; 1248 case AArch64::ADDSWrx: 1249 case AArch64::ADDSXrx: 1250 case AArch64::ADDSXrx64: 1251 case AArch64::ADDWrx: 1252 case AArch64::ADDXrx: 1253 case AArch64::ADDXrx64: 1254 case AArch64::SUBSWrx: 1255 case AArch64::SUBSXrx: 1256 case AArch64::SUBSXrx64: 1257 case AArch64::SUBWrx: 1258 case AArch64::SUBXrx: 1259 case AArch64::SUBXrx64: 1260 if (MI.getOperand(3).isImm()) { 1261 unsigned val = MI.getOperand(3).getImm(); 1262 return (val != 0); 1263 } 1264 break; 1265 } 1266 1267 return false; 1268 } 1269 1270 // Return true if this instruction simply sets its single destination register 1271 // to zero. This is equivalent to a register rename of the zero-register. 1272 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) const { 1273 switch (MI.getOpcode()) { 1274 default: 1275 break; 1276 case AArch64::MOVZWi: 1277 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) 1278 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) { 1279 assert(MI.getDesc().getNumOperands() == 3 && 1280 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands"); 1281 return true; 1282 } 1283 break; 1284 case AArch64::ANDWri: // and Rd, Rzr, #imm 1285 return MI.getOperand(1).getReg() == AArch64::WZR; 1286 case AArch64::ANDXri: 1287 return MI.getOperand(1).getReg() == AArch64::XZR; 1288 case TargetOpcode::COPY: 1289 return MI.getOperand(1).getReg() == AArch64::WZR; 1290 } 1291 return false; 1292 } 1293 1294 // Return true if this instruction simply renames a general register without 1295 // modifying bits. 1296 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) const { 1297 switch (MI.getOpcode()) { 1298 default: 1299 break; 1300 case TargetOpcode::COPY: { 1301 // GPR32 copies will by lowered to ORRXrs 1302 unsigned DstReg = MI.getOperand(0).getReg(); 1303 return (AArch64::GPR32RegClass.contains(DstReg) || 1304 AArch64::GPR64RegClass.contains(DstReg)); 1305 } 1306 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) 1307 if (MI.getOperand(1).getReg() == AArch64::XZR) { 1308 assert(MI.getDesc().getNumOperands() == 4 && 1309 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands"); 1310 return true; 1311 } 1312 break; 1313 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) 1314 if (MI.getOperand(2).getImm() == 0) { 1315 assert(MI.getDesc().getNumOperands() == 4 && 1316 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands"); 1317 return true; 1318 } 1319 break; 1320 } 1321 return false; 1322 } 1323 1324 // Return true if this instruction simply renames a general register without 1325 // modifying bits. 1326 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) const { 1327 switch (MI.getOpcode()) { 1328 default: 1329 break; 1330 case TargetOpcode::COPY: { 1331 // FPR64 copies will by lowered to ORR.16b 1332 unsigned DstReg = MI.getOperand(0).getReg(); 1333 return (AArch64::FPR64RegClass.contains(DstReg) || 1334 AArch64::FPR128RegClass.contains(DstReg)); 1335 } 1336 case AArch64::ORRv16i8: 1337 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { 1338 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() && 1339 "invalid ORRv16i8 operands"); 1340 return true; 1341 } 1342 break; 1343 } 1344 return false; 1345 } 1346 1347 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 1348 int &FrameIndex) const { 1349 switch (MI.getOpcode()) { 1350 default: 1351 break; 1352 case AArch64::LDRWui: 1353 case AArch64::LDRXui: 1354 case AArch64::LDRBui: 1355 case AArch64::LDRHui: 1356 case AArch64::LDRSui: 1357 case AArch64::LDRDui: 1358 case AArch64::LDRQui: 1359 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 1360 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 1361 FrameIndex = MI.getOperand(1).getIndex(); 1362 return MI.getOperand(0).getReg(); 1363 } 1364 break; 1365 } 1366 1367 return 0; 1368 } 1369 1370 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI, 1371 int &FrameIndex) const { 1372 switch (MI.getOpcode()) { 1373 default: 1374 break; 1375 case AArch64::STRWui: 1376 case AArch64::STRXui: 1377 case AArch64::STRBui: 1378 case AArch64::STRHui: 1379 case AArch64::STRSui: 1380 case AArch64::STRDui: 1381 case AArch64::STRQui: 1382 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 1383 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 1384 FrameIndex = MI.getOperand(1).getIndex(); 1385 return MI.getOperand(0).getReg(); 1386 } 1387 break; 1388 } 1389 return 0; 1390 } 1391 1392 /// Return true if this is load/store scales or extends its register offset. 1393 /// This refers to scaling a dynamic index as opposed to scaled immediates. 1394 /// MI should be a memory op that allows scaled addressing. 1395 bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const { 1396 switch (MI.getOpcode()) { 1397 default: 1398 break; 1399 case AArch64::LDRBBroW: 1400 case AArch64::LDRBroW: 1401 case AArch64::LDRDroW: 1402 case AArch64::LDRHHroW: 1403 case AArch64::LDRHroW: 1404 case AArch64::LDRQroW: 1405 case AArch64::LDRSBWroW: 1406 case AArch64::LDRSBXroW: 1407 case AArch64::LDRSHWroW: 1408 case AArch64::LDRSHXroW: 1409 case AArch64::LDRSWroW: 1410 case AArch64::LDRSroW: 1411 case AArch64::LDRWroW: 1412 case AArch64::LDRXroW: 1413 case AArch64::STRBBroW: 1414 case AArch64::STRBroW: 1415 case AArch64::STRDroW: 1416 case AArch64::STRHHroW: 1417 case AArch64::STRHroW: 1418 case AArch64::STRQroW: 1419 case AArch64::STRSroW: 1420 case AArch64::STRWroW: 1421 case AArch64::STRXroW: 1422 case AArch64::LDRBBroX: 1423 case AArch64::LDRBroX: 1424 case AArch64::LDRDroX: 1425 case AArch64::LDRHHroX: 1426 case AArch64::LDRHroX: 1427 case AArch64::LDRQroX: 1428 case AArch64::LDRSBWroX: 1429 case AArch64::LDRSBXroX: 1430 case AArch64::LDRSHWroX: 1431 case AArch64::LDRSHXroX: 1432 case AArch64::LDRSWroX: 1433 case AArch64::LDRSroX: 1434 case AArch64::LDRWroX: 1435 case AArch64::LDRXroX: 1436 case AArch64::STRBBroX: 1437 case AArch64::STRBroX: 1438 case AArch64::STRDroX: 1439 case AArch64::STRHHroX: 1440 case AArch64::STRHroX: 1441 case AArch64::STRQroX: 1442 case AArch64::STRSroX: 1443 case AArch64::STRWroX: 1444 case AArch64::STRXroX: 1445 1446 unsigned Val = MI.getOperand(3).getImm(); 1447 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val); 1448 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val); 1449 } 1450 return false; 1451 } 1452 1453 /// Check all MachineMemOperands for a hint to suppress pairing. 1454 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const { 1455 return any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 1456 return MMO->getFlags() & MOSuppressPair; 1457 }); 1458 } 1459 1460 /// Set a flag on the first MachineMemOperand to suppress pairing. 1461 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const { 1462 if (MI.memoperands_empty()) 1463 return; 1464 (*MI.memoperands_begin())->setFlags(MOSuppressPair); 1465 } 1466 1467 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const { 1468 switch (Opc) { 1469 default: 1470 return false; 1471 case AArch64::STURSi: 1472 case AArch64::STURDi: 1473 case AArch64::STURQi: 1474 case AArch64::STURBBi: 1475 case AArch64::STURHHi: 1476 case AArch64::STURWi: 1477 case AArch64::STURXi: 1478 case AArch64::LDURSi: 1479 case AArch64::LDURDi: 1480 case AArch64::LDURQi: 1481 case AArch64::LDURWi: 1482 case AArch64::LDURXi: 1483 case AArch64::LDURSWi: 1484 case AArch64::LDURHHi: 1485 case AArch64::LDURBBi: 1486 case AArch64::LDURSBWi: 1487 case AArch64::LDURSHWi: 1488 return true; 1489 } 1490 } 1491 1492 bool AArch64InstrInfo::isUnscaledLdSt(MachineInstr &MI) const { 1493 return isUnscaledLdSt(MI.getOpcode()); 1494 } 1495 1496 // Is this a candidate for ld/st merging or pairing? For example, we don't 1497 // touch volatiles or load/stores that have a hint to avoid pair formation. 1498 bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const { 1499 // If this is a volatile load/store, don't mess with it. 1500 if (MI.hasOrderedMemoryRef()) 1501 return false; 1502 1503 // Make sure this is a reg+imm (as opposed to an address reloc). 1504 assert(MI.getOperand(1).isReg() && "Expected a reg operand."); 1505 if (!MI.getOperand(2).isImm()) 1506 return false; 1507 1508 // Can't merge/pair if the instruction modifies the base register. 1509 // e.g., ldr x0, [x0] 1510 unsigned BaseReg = MI.getOperand(1).getReg(); 1511 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1512 if (MI.modifiesRegister(BaseReg, TRI)) 1513 return false; 1514 1515 // Check if this load/store has a hint to avoid pair formation. 1516 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. 1517 if (isLdStPairSuppressed(MI)) 1518 return false; 1519 1520 // On some CPUs quad load/store pairs are slower than two single load/stores. 1521 if (Subtarget.avoidQuadLdStPairs()) { 1522 switch (MI.getOpcode()) { 1523 default: 1524 break; 1525 1526 case AArch64::LDURQi: 1527 case AArch64::STURQi: 1528 case AArch64::LDRQui: 1529 case AArch64::STRQui: 1530 return false; 1531 } 1532 } 1533 1534 return true; 1535 } 1536 1537 bool AArch64InstrInfo::getMemOpBaseRegImmOfs( 1538 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, 1539 const TargetRegisterInfo *TRI) const { 1540 switch (LdSt.getOpcode()) { 1541 default: 1542 return false; 1543 // Scaled instructions. 1544 case AArch64::STRSui: 1545 case AArch64::STRDui: 1546 case AArch64::STRQui: 1547 case AArch64::STRXui: 1548 case AArch64::STRWui: 1549 case AArch64::LDRSui: 1550 case AArch64::LDRDui: 1551 case AArch64::LDRQui: 1552 case AArch64::LDRXui: 1553 case AArch64::LDRWui: 1554 case AArch64::LDRSWui: 1555 // Unscaled instructions. 1556 case AArch64::STURSi: 1557 case AArch64::STURDi: 1558 case AArch64::STURQi: 1559 case AArch64::STURXi: 1560 case AArch64::STURWi: 1561 case AArch64::LDURSi: 1562 case AArch64::LDURDi: 1563 case AArch64::LDURQi: 1564 case AArch64::LDURWi: 1565 case AArch64::LDURXi: 1566 case AArch64::LDURSWi: 1567 unsigned Width; 1568 return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI); 1569 }; 1570 } 1571 1572 bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( 1573 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width, 1574 const TargetRegisterInfo *TRI) const { 1575 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 1576 // Handle only loads/stores with base register followed by immediate offset. 1577 if (LdSt.getNumExplicitOperands() == 3) { 1578 // Non-paired instruction (e.g., ldr x1, [x0, #8]). 1579 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm()) 1580 return false; 1581 } else if (LdSt.getNumExplicitOperands() == 4) { 1582 // Paired instruction (e.g., ldp x1, x2, [x0, #8]). 1583 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() || 1584 !LdSt.getOperand(3).isImm()) 1585 return false; 1586 } else 1587 return false; 1588 1589 // Offset is calculated as the immediate operand multiplied by the scaling factor. 1590 // Unscaled instructions have scaling factor set to 1. 1591 unsigned Scale = 0; 1592 switch (LdSt.getOpcode()) { 1593 default: 1594 return false; 1595 case AArch64::LDURQi: 1596 case AArch64::STURQi: 1597 Width = 16; 1598 Scale = 1; 1599 break; 1600 case AArch64::LDURXi: 1601 case AArch64::LDURDi: 1602 case AArch64::STURXi: 1603 case AArch64::STURDi: 1604 Width = 8; 1605 Scale = 1; 1606 break; 1607 case AArch64::LDURWi: 1608 case AArch64::LDURSi: 1609 case AArch64::LDURSWi: 1610 case AArch64::STURWi: 1611 case AArch64::STURSi: 1612 Width = 4; 1613 Scale = 1; 1614 break; 1615 case AArch64::LDURHi: 1616 case AArch64::LDURHHi: 1617 case AArch64::LDURSHXi: 1618 case AArch64::LDURSHWi: 1619 case AArch64::STURHi: 1620 case AArch64::STURHHi: 1621 Width = 2; 1622 Scale = 1; 1623 break; 1624 case AArch64::LDURBi: 1625 case AArch64::LDURBBi: 1626 case AArch64::LDURSBXi: 1627 case AArch64::LDURSBWi: 1628 case AArch64::STURBi: 1629 case AArch64::STURBBi: 1630 Width = 1; 1631 Scale = 1; 1632 break; 1633 case AArch64::LDPQi: 1634 case AArch64::LDNPQi: 1635 case AArch64::STPQi: 1636 case AArch64::STNPQi: 1637 Scale = 16; 1638 Width = 32; 1639 break; 1640 case AArch64::LDRQui: 1641 case AArch64::STRQui: 1642 Scale = Width = 16; 1643 break; 1644 case AArch64::LDPXi: 1645 case AArch64::LDPDi: 1646 case AArch64::LDNPXi: 1647 case AArch64::LDNPDi: 1648 case AArch64::STPXi: 1649 case AArch64::STPDi: 1650 case AArch64::STNPXi: 1651 case AArch64::STNPDi: 1652 Scale = 8; 1653 Width = 16; 1654 break; 1655 case AArch64::LDRXui: 1656 case AArch64::LDRDui: 1657 case AArch64::STRXui: 1658 case AArch64::STRDui: 1659 Scale = Width = 8; 1660 break; 1661 case AArch64::LDPWi: 1662 case AArch64::LDPSi: 1663 case AArch64::LDNPWi: 1664 case AArch64::LDNPSi: 1665 case AArch64::STPWi: 1666 case AArch64::STPSi: 1667 case AArch64::STNPWi: 1668 case AArch64::STNPSi: 1669 Scale = 4; 1670 Width = 8; 1671 break; 1672 case AArch64::LDRWui: 1673 case AArch64::LDRSui: 1674 case AArch64::LDRSWui: 1675 case AArch64::STRWui: 1676 case AArch64::STRSui: 1677 Scale = Width = 4; 1678 break; 1679 case AArch64::LDRHui: 1680 case AArch64::LDRHHui: 1681 case AArch64::STRHui: 1682 case AArch64::STRHHui: 1683 Scale = Width = 2; 1684 break; 1685 case AArch64::LDRBui: 1686 case AArch64::LDRBBui: 1687 case AArch64::STRBui: 1688 case AArch64::STRBBui: 1689 Scale = Width = 1; 1690 break; 1691 } 1692 1693 if (LdSt.getNumExplicitOperands() == 3) { 1694 BaseReg = LdSt.getOperand(1).getReg(); 1695 Offset = LdSt.getOperand(2).getImm() * Scale; 1696 } else { 1697 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); 1698 BaseReg = LdSt.getOperand(2).getReg(); 1699 Offset = LdSt.getOperand(3).getImm() * Scale; 1700 } 1701 return true; 1702 } 1703 1704 // Scale the unscaled offsets. Returns false if the unscaled offset can't be 1705 // scaled. 1706 static bool scaleOffset(unsigned Opc, int64_t &Offset) { 1707 unsigned OffsetStride = 1; 1708 switch (Opc) { 1709 default: 1710 return false; 1711 case AArch64::LDURQi: 1712 case AArch64::STURQi: 1713 OffsetStride = 16; 1714 break; 1715 case AArch64::LDURXi: 1716 case AArch64::LDURDi: 1717 case AArch64::STURXi: 1718 case AArch64::STURDi: 1719 OffsetStride = 8; 1720 break; 1721 case AArch64::LDURWi: 1722 case AArch64::LDURSi: 1723 case AArch64::LDURSWi: 1724 case AArch64::STURWi: 1725 case AArch64::STURSi: 1726 OffsetStride = 4; 1727 break; 1728 } 1729 // If the byte-offset isn't a multiple of the stride, we can't scale this 1730 // offset. 1731 if (Offset % OffsetStride != 0) 1732 return false; 1733 1734 // Convert the byte-offset used by unscaled into an "element" offset used 1735 // by the scaled pair load/store instructions. 1736 Offset /= OffsetStride; 1737 return true; 1738 } 1739 1740 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { 1741 if (FirstOpc == SecondOpc) 1742 return true; 1743 // We can also pair sign-ext and zero-ext instructions. 1744 switch (FirstOpc) { 1745 default: 1746 return false; 1747 case AArch64::LDRWui: 1748 case AArch64::LDURWi: 1749 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi; 1750 case AArch64::LDRSWui: 1751 case AArch64::LDURSWi: 1752 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi; 1753 } 1754 // These instructions can't be paired based on their opcodes. 1755 return false; 1756 } 1757 1758 /// Detect opportunities for ldp/stp formation. 1759 /// 1760 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true. 1761 bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, 1762 MachineInstr &SecondLdSt, 1763 unsigned NumLoads) const { 1764 // Only cluster up to a single pair. 1765 if (NumLoads > 1) 1766 return false; 1767 1768 // Can we pair these instructions based on their opcodes? 1769 unsigned FirstOpc = FirstLdSt.getOpcode(); 1770 unsigned SecondOpc = SecondLdSt.getOpcode(); 1771 if (!canPairLdStOpc(FirstOpc, SecondOpc)) 1772 return false; 1773 1774 // Can't merge volatiles or load/stores that have a hint to avoid pair 1775 // formation, for example. 1776 if (!isCandidateToMergeOrPair(FirstLdSt) || 1777 !isCandidateToMergeOrPair(SecondLdSt)) 1778 return false; 1779 1780 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate. 1781 int64_t Offset1 = FirstLdSt.getOperand(2).getImm(); 1782 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1)) 1783 return false; 1784 1785 int64_t Offset2 = SecondLdSt.getOperand(2).getImm(); 1786 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2)) 1787 return false; 1788 1789 // Pairwise instructions have a 7-bit signed offset field. 1790 if (Offset1 > 63 || Offset1 < -64) 1791 return false; 1792 1793 // The caller should already have ordered First/SecondLdSt by offset. 1794 assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); 1795 return Offset1 + 1 == Offset2; 1796 } 1797 1798 bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First, 1799 MachineInstr &Second) const { 1800 if (Subtarget.hasMacroOpFusion()) { 1801 // Fuse CMN, CMP, TST followed by Bcc. 1802 unsigned SecondOpcode = Second.getOpcode(); 1803 if (SecondOpcode == AArch64::Bcc) { 1804 switch (First.getOpcode()) { 1805 default: 1806 return false; 1807 case AArch64::SUBSWri: 1808 case AArch64::ADDSWri: 1809 case AArch64::ANDSWri: 1810 case AArch64::SUBSXri: 1811 case AArch64::ADDSXri: 1812 case AArch64::ANDSXri: 1813 return true; 1814 } 1815 } 1816 // Fuse ALU operations followed by CBZ/CBNZ. 1817 if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX || 1818 SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) { 1819 switch (First.getOpcode()) { 1820 default: 1821 return false; 1822 case AArch64::ADDWri: 1823 case AArch64::ADDXri: 1824 case AArch64::ANDWri: 1825 case AArch64::ANDXri: 1826 case AArch64::EORWri: 1827 case AArch64::EORXri: 1828 case AArch64::ORRWri: 1829 case AArch64::ORRXri: 1830 case AArch64::SUBWri: 1831 case AArch64::SUBXri: 1832 return true; 1833 } 1834 } 1835 } 1836 return false; 1837 } 1838 1839 MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue( 1840 MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var, 1841 const MDNode *Expr, const DebugLoc &DL) const { 1842 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) 1843 .addFrameIndex(FrameIx) 1844 .addImm(0) 1845 .addImm(Offset) 1846 .addMetadata(Var) 1847 .addMetadata(Expr); 1848 return &*MIB; 1849 } 1850 1851 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, 1852 unsigned Reg, unsigned SubIdx, 1853 unsigned State, 1854 const TargetRegisterInfo *TRI) { 1855 if (!SubIdx) 1856 return MIB.addReg(Reg, State); 1857 1858 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1859 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 1860 return MIB.addReg(Reg, State, SubIdx); 1861 } 1862 1863 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, 1864 unsigned NumRegs) { 1865 // We really want the positive remainder mod 32 here, that happens to be 1866 // easily obtainable with a mask. 1867 return ((DestReg - SrcReg) & 0x1f) < NumRegs; 1868 } 1869 1870 void AArch64InstrInfo::copyPhysRegTuple( 1871 MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, 1872 unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, 1873 llvm::ArrayRef<unsigned> Indices) const { 1874 assert(Subtarget.hasNEON() && 1875 "Unexpected register copy without NEON"); 1876 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1877 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 1878 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 1879 unsigned NumRegs = Indices.size(); 1880 1881 int SubReg = 0, End = NumRegs, Incr = 1; 1882 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { 1883 SubReg = NumRegs - 1; 1884 End = -1; 1885 Incr = -1; 1886 } 1887 1888 for (; SubReg != End; SubReg += Incr) { 1889 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 1890 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 1891 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); 1892 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 1893 } 1894 } 1895 1896 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 1897 MachineBasicBlock::iterator I, 1898 const DebugLoc &DL, unsigned DestReg, 1899 unsigned SrcReg, bool KillSrc) const { 1900 if (AArch64::GPR32spRegClass.contains(DestReg) && 1901 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { 1902 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1903 1904 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { 1905 // If either operand is WSP, expand to ADD #0. 1906 if (Subtarget.hasZeroCycleRegMove()) { 1907 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. 1908 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 1909 &AArch64::GPR64spRegClass); 1910 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 1911 &AArch64::GPR64spRegClass); 1912 // This instruction is reading and writing X registers. This may upset 1913 // the register scavenger and machine verifier, so we need to indicate 1914 // that we are reading an undefined value from SrcRegX, but a proper 1915 // value from SrcReg. 1916 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) 1917 .addReg(SrcRegX, RegState::Undef) 1918 .addImm(0) 1919 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 1920 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 1921 } else { 1922 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) 1923 .addReg(SrcReg, getKillRegState(KillSrc)) 1924 .addImm(0) 1925 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1926 } 1927 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) { 1928 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm( 1929 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1930 } else { 1931 if (Subtarget.hasZeroCycleRegMove()) { 1932 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. 1933 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 1934 &AArch64::GPR64spRegClass); 1935 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 1936 &AArch64::GPR64spRegClass); 1937 // This instruction is reading and writing X registers. This may upset 1938 // the register scavenger and machine verifier, so we need to indicate 1939 // that we are reading an undefined value from SrcRegX, but a proper 1940 // value from SrcReg. 1941 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) 1942 .addReg(AArch64::XZR) 1943 .addReg(SrcRegX, RegState::Undef) 1944 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 1945 } else { 1946 // Otherwise, expand to ORR WZR. 1947 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) 1948 .addReg(AArch64::WZR) 1949 .addReg(SrcReg, getKillRegState(KillSrc)); 1950 } 1951 } 1952 return; 1953 } 1954 1955 if (AArch64::GPR64spRegClass.contains(DestReg) && 1956 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { 1957 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { 1958 // If either operand is SP, expand to ADD #0. 1959 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) 1960 .addReg(SrcReg, getKillRegState(KillSrc)) 1961 .addImm(0) 1962 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1963 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) { 1964 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm( 1965 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1966 } else { 1967 // Otherwise, expand to ORR XZR. 1968 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) 1969 .addReg(AArch64::XZR) 1970 .addReg(SrcReg, getKillRegState(KillSrc)); 1971 } 1972 return; 1973 } 1974 1975 // Copy a DDDD register quad by copying the individual sub-registers. 1976 if (AArch64::DDDDRegClass.contains(DestReg) && 1977 AArch64::DDDDRegClass.contains(SrcReg)) { 1978 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, 1979 AArch64::dsub2, AArch64::dsub3 }; 1980 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 1981 Indices); 1982 return; 1983 } 1984 1985 // Copy a DDD register triple by copying the individual sub-registers. 1986 if (AArch64::DDDRegClass.contains(DestReg) && 1987 AArch64::DDDRegClass.contains(SrcReg)) { 1988 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, 1989 AArch64::dsub2 }; 1990 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 1991 Indices); 1992 return; 1993 } 1994 1995 // Copy a DD register pair by copying the individual sub-registers. 1996 if (AArch64::DDRegClass.contains(DestReg) && 1997 AArch64::DDRegClass.contains(SrcReg)) { 1998 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 }; 1999 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2000 Indices); 2001 return; 2002 } 2003 2004 // Copy a QQQQ register quad by copying the individual sub-registers. 2005 if (AArch64::QQQQRegClass.contains(DestReg) && 2006 AArch64::QQQQRegClass.contains(SrcReg)) { 2007 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, 2008 AArch64::qsub2, AArch64::qsub3 }; 2009 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2010 Indices); 2011 return; 2012 } 2013 2014 // Copy a QQQ register triple by copying the individual sub-registers. 2015 if (AArch64::QQQRegClass.contains(DestReg) && 2016 AArch64::QQQRegClass.contains(SrcReg)) { 2017 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, 2018 AArch64::qsub2 }; 2019 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2020 Indices); 2021 return; 2022 } 2023 2024 // Copy a QQ register pair by copying the individual sub-registers. 2025 if (AArch64::QQRegClass.contains(DestReg) && 2026 AArch64::QQRegClass.contains(SrcReg)) { 2027 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 }; 2028 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2029 Indices); 2030 return; 2031 } 2032 2033 if (AArch64::FPR128RegClass.contains(DestReg) && 2034 AArch64::FPR128RegClass.contains(SrcReg)) { 2035 if(Subtarget.hasNEON()) { 2036 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2037 .addReg(SrcReg) 2038 .addReg(SrcReg, getKillRegState(KillSrc)); 2039 } else { 2040 BuildMI(MBB, I, DL, get(AArch64::STRQpre)) 2041 .addReg(AArch64::SP, RegState::Define) 2042 .addReg(SrcReg, getKillRegState(KillSrc)) 2043 .addReg(AArch64::SP) 2044 .addImm(-16); 2045 BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) 2046 .addReg(AArch64::SP, RegState::Define) 2047 .addReg(DestReg, RegState::Define) 2048 .addReg(AArch64::SP) 2049 .addImm(16); 2050 } 2051 return; 2052 } 2053 2054 if (AArch64::FPR64RegClass.contains(DestReg) && 2055 AArch64::FPR64RegClass.contains(SrcReg)) { 2056 if(Subtarget.hasNEON()) { 2057 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, 2058 &AArch64::FPR128RegClass); 2059 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, 2060 &AArch64::FPR128RegClass); 2061 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2062 .addReg(SrcReg) 2063 .addReg(SrcReg, getKillRegState(KillSrc)); 2064 } else { 2065 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) 2066 .addReg(SrcReg, getKillRegState(KillSrc)); 2067 } 2068 return; 2069 } 2070 2071 if (AArch64::FPR32RegClass.contains(DestReg) && 2072 AArch64::FPR32RegClass.contains(SrcReg)) { 2073 if(Subtarget.hasNEON()) { 2074 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, 2075 &AArch64::FPR128RegClass); 2076 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, 2077 &AArch64::FPR128RegClass); 2078 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2079 .addReg(SrcReg) 2080 .addReg(SrcReg, getKillRegState(KillSrc)); 2081 } else { 2082 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2083 .addReg(SrcReg, getKillRegState(KillSrc)); 2084 } 2085 return; 2086 } 2087 2088 if (AArch64::FPR16RegClass.contains(DestReg) && 2089 AArch64::FPR16RegClass.contains(SrcReg)) { 2090 if(Subtarget.hasNEON()) { 2091 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 2092 &AArch64::FPR128RegClass); 2093 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 2094 &AArch64::FPR128RegClass); 2095 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2096 .addReg(SrcReg) 2097 .addReg(SrcReg, getKillRegState(KillSrc)); 2098 } else { 2099 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 2100 &AArch64::FPR32RegClass); 2101 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 2102 &AArch64::FPR32RegClass); 2103 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2104 .addReg(SrcReg, getKillRegState(KillSrc)); 2105 } 2106 return; 2107 } 2108 2109 if (AArch64::FPR8RegClass.contains(DestReg) && 2110 AArch64::FPR8RegClass.contains(SrcReg)) { 2111 if(Subtarget.hasNEON()) { 2112 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 2113 &AArch64::FPR128RegClass); 2114 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 2115 &AArch64::FPR128RegClass); 2116 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2117 .addReg(SrcReg) 2118 .addReg(SrcReg, getKillRegState(KillSrc)); 2119 } else { 2120 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 2121 &AArch64::FPR32RegClass); 2122 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 2123 &AArch64::FPR32RegClass); 2124 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2125 .addReg(SrcReg, getKillRegState(KillSrc)); 2126 } 2127 return; 2128 } 2129 2130 // Copies between GPR64 and FPR64. 2131 if (AArch64::FPR64RegClass.contains(DestReg) && 2132 AArch64::GPR64RegClass.contains(SrcReg)) { 2133 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) 2134 .addReg(SrcReg, getKillRegState(KillSrc)); 2135 return; 2136 } 2137 if (AArch64::GPR64RegClass.contains(DestReg) && 2138 AArch64::FPR64RegClass.contains(SrcReg)) { 2139 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) 2140 .addReg(SrcReg, getKillRegState(KillSrc)); 2141 return; 2142 } 2143 // Copies between GPR32 and FPR32. 2144 if (AArch64::FPR32RegClass.contains(DestReg) && 2145 AArch64::GPR32RegClass.contains(SrcReg)) { 2146 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) 2147 .addReg(SrcReg, getKillRegState(KillSrc)); 2148 return; 2149 } 2150 if (AArch64::GPR32RegClass.contains(DestReg) && 2151 AArch64::FPR32RegClass.contains(SrcReg)) { 2152 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) 2153 .addReg(SrcReg, getKillRegState(KillSrc)); 2154 return; 2155 } 2156 2157 if (DestReg == AArch64::NZCV) { 2158 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); 2159 BuildMI(MBB, I, DL, get(AArch64::MSR)) 2160 .addImm(AArch64SysReg::NZCV) 2161 .addReg(SrcReg, getKillRegState(KillSrc)) 2162 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); 2163 return; 2164 } 2165 2166 if (SrcReg == AArch64::NZCV) { 2167 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); 2168 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg) 2169 .addImm(AArch64SysReg::NZCV) 2170 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); 2171 return; 2172 } 2173 2174 llvm_unreachable("unimplemented reg-to-reg copy"); 2175 } 2176 2177 void AArch64InstrInfo::storeRegToStackSlot( 2178 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, 2179 bool isKill, int FI, const TargetRegisterClass *RC, 2180 const TargetRegisterInfo *TRI) const { 2181 DebugLoc DL; 2182 if (MBBI != MBB.end()) 2183 DL = MBBI->getDebugLoc(); 2184 MachineFunction &MF = *MBB.getParent(); 2185 MachineFrameInfo &MFI = *MF.getFrameInfo(); 2186 unsigned Align = MFI.getObjectAlignment(FI); 2187 2188 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 2189 MachineMemOperand *MMO = MF.getMachineMemOperand( 2190 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); 2191 unsigned Opc = 0; 2192 bool Offset = true; 2193 switch (RC->getSize()) { 2194 case 1: 2195 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 2196 Opc = AArch64::STRBui; 2197 break; 2198 case 2: 2199 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 2200 Opc = AArch64::STRHui; 2201 break; 2202 case 4: 2203 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 2204 Opc = AArch64::STRWui; 2205 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 2206 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 2207 else 2208 assert(SrcReg != AArch64::WSP); 2209 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 2210 Opc = AArch64::STRSui; 2211 break; 2212 case 8: 2213 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 2214 Opc = AArch64::STRXui; 2215 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 2216 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 2217 else 2218 assert(SrcReg != AArch64::SP); 2219 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 2220 Opc = AArch64::STRDui; 2221 break; 2222 case 16: 2223 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 2224 Opc = AArch64::STRQui; 2225 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 2226 assert(Subtarget.hasNEON() && 2227 "Unexpected register store without NEON"); 2228 Opc = AArch64::ST1Twov1d; 2229 Offset = false; 2230 } 2231 break; 2232 case 24: 2233 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 2234 assert(Subtarget.hasNEON() && 2235 "Unexpected register store without NEON"); 2236 Opc = AArch64::ST1Threev1d; 2237 Offset = false; 2238 } 2239 break; 2240 case 32: 2241 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 2242 assert(Subtarget.hasNEON() && 2243 "Unexpected register store without NEON"); 2244 Opc = AArch64::ST1Fourv1d; 2245 Offset = false; 2246 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 2247 assert(Subtarget.hasNEON() && 2248 "Unexpected register store without NEON"); 2249 Opc = AArch64::ST1Twov2d; 2250 Offset = false; 2251 } 2252 break; 2253 case 48: 2254 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 2255 assert(Subtarget.hasNEON() && 2256 "Unexpected register store without NEON"); 2257 Opc = AArch64::ST1Threev2d; 2258 Offset = false; 2259 } 2260 break; 2261 case 64: 2262 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 2263 assert(Subtarget.hasNEON() && 2264 "Unexpected register store without NEON"); 2265 Opc = AArch64::ST1Fourv2d; 2266 Offset = false; 2267 } 2268 break; 2269 } 2270 assert(Opc && "Unknown register class"); 2271 2272 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) 2273 .addReg(SrcReg, getKillRegState(isKill)) 2274 .addFrameIndex(FI); 2275 2276 if (Offset) 2277 MI.addImm(0); 2278 MI.addMemOperand(MMO); 2279 } 2280 2281 void AArch64InstrInfo::loadRegFromStackSlot( 2282 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, 2283 int FI, const TargetRegisterClass *RC, 2284 const TargetRegisterInfo *TRI) const { 2285 DebugLoc DL; 2286 if (MBBI != MBB.end()) 2287 DL = MBBI->getDebugLoc(); 2288 MachineFunction &MF = *MBB.getParent(); 2289 MachineFrameInfo &MFI = *MF.getFrameInfo(); 2290 unsigned Align = MFI.getObjectAlignment(FI); 2291 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 2292 MachineMemOperand *MMO = MF.getMachineMemOperand( 2293 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); 2294 2295 unsigned Opc = 0; 2296 bool Offset = true; 2297 switch (RC->getSize()) { 2298 case 1: 2299 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 2300 Opc = AArch64::LDRBui; 2301 break; 2302 case 2: 2303 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 2304 Opc = AArch64::LDRHui; 2305 break; 2306 case 4: 2307 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 2308 Opc = AArch64::LDRWui; 2309 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 2310 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); 2311 else 2312 assert(DestReg != AArch64::WSP); 2313 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 2314 Opc = AArch64::LDRSui; 2315 break; 2316 case 8: 2317 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 2318 Opc = AArch64::LDRXui; 2319 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 2320 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); 2321 else 2322 assert(DestReg != AArch64::SP); 2323 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 2324 Opc = AArch64::LDRDui; 2325 break; 2326 case 16: 2327 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 2328 Opc = AArch64::LDRQui; 2329 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 2330 assert(Subtarget.hasNEON() && 2331 "Unexpected register load without NEON"); 2332 Opc = AArch64::LD1Twov1d; 2333 Offset = false; 2334 } 2335 break; 2336 case 24: 2337 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 2338 assert(Subtarget.hasNEON() && 2339 "Unexpected register load without NEON"); 2340 Opc = AArch64::LD1Threev1d; 2341 Offset = false; 2342 } 2343 break; 2344 case 32: 2345 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 2346 assert(Subtarget.hasNEON() && 2347 "Unexpected register load without NEON"); 2348 Opc = AArch64::LD1Fourv1d; 2349 Offset = false; 2350 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 2351 assert(Subtarget.hasNEON() && 2352 "Unexpected register load without NEON"); 2353 Opc = AArch64::LD1Twov2d; 2354 Offset = false; 2355 } 2356 break; 2357 case 48: 2358 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 2359 assert(Subtarget.hasNEON() && 2360 "Unexpected register load without NEON"); 2361 Opc = AArch64::LD1Threev2d; 2362 Offset = false; 2363 } 2364 break; 2365 case 64: 2366 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 2367 assert(Subtarget.hasNEON() && 2368 "Unexpected register load without NEON"); 2369 Opc = AArch64::LD1Fourv2d; 2370 Offset = false; 2371 } 2372 break; 2373 } 2374 assert(Opc && "Unknown register class"); 2375 2376 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) 2377 .addReg(DestReg, getDefRegState(true)) 2378 .addFrameIndex(FI); 2379 if (Offset) 2380 MI.addImm(0); 2381 MI.addMemOperand(MMO); 2382 } 2383 2384 void llvm::emitFrameOffset(MachineBasicBlock &MBB, 2385 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 2386 unsigned DestReg, unsigned SrcReg, int Offset, 2387 const TargetInstrInfo *TII, 2388 MachineInstr::MIFlag Flag, bool SetNZCV) { 2389 if (DestReg == SrcReg && Offset == 0) 2390 return; 2391 2392 assert((DestReg != AArch64::SP || Offset % 16 == 0) && 2393 "SP increment/decrement not 16-byte aligned"); 2394 2395 bool isSub = Offset < 0; 2396 if (isSub) 2397 Offset = -Offset; 2398 2399 // FIXME: If the offset won't fit in 24-bits, compute the offset into a 2400 // scratch register. If DestReg is a virtual register, use it as the 2401 // scratch register; otherwise, create a new virtual register (to be 2402 // replaced by the scavenger at the end of PEI). That case can be optimized 2403 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch 2404 // register can be loaded with offset%8 and the add/sub can use an extending 2405 // instruction with LSL#3. 2406 // Currently the function handles any offsets but generates a poor sequence 2407 // of code. 2408 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); 2409 2410 unsigned Opc; 2411 if (SetNZCV) 2412 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri; 2413 else 2414 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri; 2415 const unsigned MaxEncoding = 0xfff; 2416 const unsigned ShiftSize = 12; 2417 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; 2418 while (((unsigned)Offset) >= (1 << ShiftSize)) { 2419 unsigned ThisVal; 2420 if (((unsigned)Offset) > MaxEncodableValue) { 2421 ThisVal = MaxEncodableValue; 2422 } else { 2423 ThisVal = Offset & MaxEncodableValue; 2424 } 2425 assert((ThisVal >> ShiftSize) <= MaxEncoding && 2426 "Encoding cannot handle value that big"); 2427 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 2428 .addReg(SrcReg) 2429 .addImm(ThisVal >> ShiftSize) 2430 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize)) 2431 .setMIFlag(Flag); 2432 2433 SrcReg = DestReg; 2434 Offset -= ThisVal; 2435 if (Offset == 0) 2436 return; 2437 } 2438 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 2439 .addReg(SrcReg) 2440 .addImm(Offset) 2441 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 2442 .setMIFlag(Flag); 2443 } 2444 2445 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( 2446 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 2447 MachineBasicBlock::iterator InsertPt, int FrameIndex, 2448 LiveIntervals *LIS) const { 2449 // This is a bit of a hack. Consider this instruction: 2450 // 2451 // %vreg0<def> = COPY %SP; GPR64all:%vreg0 2452 // 2453 // We explicitly chose GPR64all for the virtual register so such a copy might 2454 // be eliminated by RegisterCoalescer. However, that may not be possible, and 2455 // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all 2456 // register class, TargetInstrInfo::foldMemoryOperand() is going to try. 2457 // 2458 // To prevent that, we are going to constrain the %vreg0 register class here. 2459 // 2460 // <rdar://problem/11522048> 2461 // 2462 if (MI.isCopy()) { 2463 unsigned DstReg = MI.getOperand(0).getReg(); 2464 unsigned SrcReg = MI.getOperand(1).getReg(); 2465 if (SrcReg == AArch64::SP && 2466 TargetRegisterInfo::isVirtualRegister(DstReg)) { 2467 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); 2468 return nullptr; 2469 } 2470 if (DstReg == AArch64::SP && 2471 TargetRegisterInfo::isVirtualRegister(SrcReg)) { 2472 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 2473 return nullptr; 2474 } 2475 } 2476 2477 // Cannot fold. 2478 return nullptr; 2479 } 2480 2481 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, 2482 bool *OutUseUnscaledOp, 2483 unsigned *OutUnscaledOp, 2484 int *EmittableOffset) { 2485 int Scale = 1; 2486 bool IsSigned = false; 2487 // The ImmIdx should be changed case by case if it is not 2. 2488 unsigned ImmIdx = 2; 2489 unsigned UnscaledOp = 0; 2490 // Set output values in case of early exit. 2491 if (EmittableOffset) 2492 *EmittableOffset = 0; 2493 if (OutUseUnscaledOp) 2494 *OutUseUnscaledOp = false; 2495 if (OutUnscaledOp) 2496 *OutUnscaledOp = 0; 2497 switch (MI.getOpcode()) { 2498 default: 2499 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex"); 2500 // Vector spills/fills can't take an immediate offset. 2501 case AArch64::LD1Twov2d: 2502 case AArch64::LD1Threev2d: 2503 case AArch64::LD1Fourv2d: 2504 case AArch64::LD1Twov1d: 2505 case AArch64::LD1Threev1d: 2506 case AArch64::LD1Fourv1d: 2507 case AArch64::ST1Twov2d: 2508 case AArch64::ST1Threev2d: 2509 case AArch64::ST1Fourv2d: 2510 case AArch64::ST1Twov1d: 2511 case AArch64::ST1Threev1d: 2512 case AArch64::ST1Fourv1d: 2513 return AArch64FrameOffsetCannotUpdate; 2514 case AArch64::PRFMui: 2515 Scale = 8; 2516 UnscaledOp = AArch64::PRFUMi; 2517 break; 2518 case AArch64::LDRXui: 2519 Scale = 8; 2520 UnscaledOp = AArch64::LDURXi; 2521 break; 2522 case AArch64::LDRWui: 2523 Scale = 4; 2524 UnscaledOp = AArch64::LDURWi; 2525 break; 2526 case AArch64::LDRBui: 2527 Scale = 1; 2528 UnscaledOp = AArch64::LDURBi; 2529 break; 2530 case AArch64::LDRHui: 2531 Scale = 2; 2532 UnscaledOp = AArch64::LDURHi; 2533 break; 2534 case AArch64::LDRSui: 2535 Scale = 4; 2536 UnscaledOp = AArch64::LDURSi; 2537 break; 2538 case AArch64::LDRDui: 2539 Scale = 8; 2540 UnscaledOp = AArch64::LDURDi; 2541 break; 2542 case AArch64::LDRQui: 2543 Scale = 16; 2544 UnscaledOp = AArch64::LDURQi; 2545 break; 2546 case AArch64::LDRBBui: 2547 Scale = 1; 2548 UnscaledOp = AArch64::LDURBBi; 2549 break; 2550 case AArch64::LDRHHui: 2551 Scale = 2; 2552 UnscaledOp = AArch64::LDURHHi; 2553 break; 2554 case AArch64::LDRSBXui: 2555 Scale = 1; 2556 UnscaledOp = AArch64::LDURSBXi; 2557 break; 2558 case AArch64::LDRSBWui: 2559 Scale = 1; 2560 UnscaledOp = AArch64::LDURSBWi; 2561 break; 2562 case AArch64::LDRSHXui: 2563 Scale = 2; 2564 UnscaledOp = AArch64::LDURSHXi; 2565 break; 2566 case AArch64::LDRSHWui: 2567 Scale = 2; 2568 UnscaledOp = AArch64::LDURSHWi; 2569 break; 2570 case AArch64::LDRSWui: 2571 Scale = 4; 2572 UnscaledOp = AArch64::LDURSWi; 2573 break; 2574 2575 case AArch64::STRXui: 2576 Scale = 8; 2577 UnscaledOp = AArch64::STURXi; 2578 break; 2579 case AArch64::STRWui: 2580 Scale = 4; 2581 UnscaledOp = AArch64::STURWi; 2582 break; 2583 case AArch64::STRBui: 2584 Scale = 1; 2585 UnscaledOp = AArch64::STURBi; 2586 break; 2587 case AArch64::STRHui: 2588 Scale = 2; 2589 UnscaledOp = AArch64::STURHi; 2590 break; 2591 case AArch64::STRSui: 2592 Scale = 4; 2593 UnscaledOp = AArch64::STURSi; 2594 break; 2595 case AArch64::STRDui: 2596 Scale = 8; 2597 UnscaledOp = AArch64::STURDi; 2598 break; 2599 case AArch64::STRQui: 2600 Scale = 16; 2601 UnscaledOp = AArch64::STURQi; 2602 break; 2603 case AArch64::STRBBui: 2604 Scale = 1; 2605 UnscaledOp = AArch64::STURBBi; 2606 break; 2607 case AArch64::STRHHui: 2608 Scale = 2; 2609 UnscaledOp = AArch64::STURHHi; 2610 break; 2611 2612 case AArch64::LDPXi: 2613 case AArch64::LDPDi: 2614 case AArch64::STPXi: 2615 case AArch64::STPDi: 2616 case AArch64::LDNPXi: 2617 case AArch64::LDNPDi: 2618 case AArch64::STNPXi: 2619 case AArch64::STNPDi: 2620 ImmIdx = 3; 2621 IsSigned = true; 2622 Scale = 8; 2623 break; 2624 case AArch64::LDPQi: 2625 case AArch64::STPQi: 2626 case AArch64::LDNPQi: 2627 case AArch64::STNPQi: 2628 ImmIdx = 3; 2629 IsSigned = true; 2630 Scale = 16; 2631 break; 2632 case AArch64::LDPWi: 2633 case AArch64::LDPSi: 2634 case AArch64::STPWi: 2635 case AArch64::STPSi: 2636 case AArch64::LDNPWi: 2637 case AArch64::LDNPSi: 2638 case AArch64::STNPWi: 2639 case AArch64::STNPSi: 2640 ImmIdx = 3; 2641 IsSigned = true; 2642 Scale = 4; 2643 break; 2644 2645 case AArch64::LDURXi: 2646 case AArch64::LDURWi: 2647 case AArch64::LDURBi: 2648 case AArch64::LDURHi: 2649 case AArch64::LDURSi: 2650 case AArch64::LDURDi: 2651 case AArch64::LDURQi: 2652 case AArch64::LDURHHi: 2653 case AArch64::LDURBBi: 2654 case AArch64::LDURSBXi: 2655 case AArch64::LDURSBWi: 2656 case AArch64::LDURSHXi: 2657 case AArch64::LDURSHWi: 2658 case AArch64::LDURSWi: 2659 case AArch64::STURXi: 2660 case AArch64::STURWi: 2661 case AArch64::STURBi: 2662 case AArch64::STURHi: 2663 case AArch64::STURSi: 2664 case AArch64::STURDi: 2665 case AArch64::STURQi: 2666 case AArch64::STURBBi: 2667 case AArch64::STURHHi: 2668 Scale = 1; 2669 break; 2670 } 2671 2672 Offset += MI.getOperand(ImmIdx).getImm() * Scale; 2673 2674 bool useUnscaledOp = false; 2675 // If the offset doesn't match the scale, we rewrite the instruction to 2676 // use the unscaled instruction instead. Likewise, if we have a negative 2677 // offset (and have an unscaled op to use). 2678 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0)) 2679 useUnscaledOp = true; 2680 2681 // Use an unscaled addressing mode if the instruction has a negative offset 2682 // (or if the instruction is already using an unscaled addressing mode). 2683 unsigned MaskBits; 2684 if (IsSigned) { 2685 // ldp/stp instructions. 2686 MaskBits = 7; 2687 Offset /= Scale; 2688 } else if (UnscaledOp == 0 || useUnscaledOp) { 2689 MaskBits = 9; 2690 IsSigned = true; 2691 Scale = 1; 2692 } else { 2693 MaskBits = 12; 2694 IsSigned = false; 2695 Offset /= Scale; 2696 } 2697 2698 // Attempt to fold address computation. 2699 int MaxOff = (1 << (MaskBits - IsSigned)) - 1; 2700 int MinOff = (IsSigned ? (-MaxOff - 1) : 0); 2701 if (Offset >= MinOff && Offset <= MaxOff) { 2702 if (EmittableOffset) 2703 *EmittableOffset = Offset; 2704 Offset = 0; 2705 } else { 2706 int NewOff = Offset < 0 ? MinOff : MaxOff; 2707 if (EmittableOffset) 2708 *EmittableOffset = NewOff; 2709 Offset = (Offset - NewOff) * Scale; 2710 } 2711 if (OutUseUnscaledOp) 2712 *OutUseUnscaledOp = useUnscaledOp; 2713 if (OutUnscaledOp) 2714 *OutUnscaledOp = UnscaledOp; 2715 return AArch64FrameOffsetCanUpdate | 2716 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0); 2717 } 2718 2719 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 2720 unsigned FrameReg, int &Offset, 2721 const AArch64InstrInfo *TII) { 2722 unsigned Opcode = MI.getOpcode(); 2723 unsigned ImmIdx = FrameRegIdx + 1; 2724 2725 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { 2726 Offset += MI.getOperand(ImmIdx).getImm(); 2727 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), 2728 MI.getOperand(0).getReg(), FrameReg, Offset, TII, 2729 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); 2730 MI.eraseFromParent(); 2731 Offset = 0; 2732 return true; 2733 } 2734 2735 int NewOffset; 2736 unsigned UnscaledOp; 2737 bool UseUnscaledOp; 2738 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, 2739 &UnscaledOp, &NewOffset); 2740 if (Status & AArch64FrameOffsetCanUpdate) { 2741 if (Status & AArch64FrameOffsetIsLegal) 2742 // Replace the FrameIndex with FrameReg. 2743 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 2744 if (UseUnscaledOp) 2745 MI.setDesc(TII->get(UnscaledOp)); 2746 2747 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); 2748 return Offset == 0; 2749 } 2750 2751 return false; 2752 } 2753 2754 void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { 2755 NopInst.setOpcode(AArch64::HINT); 2756 NopInst.addOperand(MCOperand::createImm(0)); 2757 } 2758 2759 // AArch64 supports MachineCombiner. 2760 bool AArch64InstrInfo::useMachineCombiner() const { 2761 2762 return true; 2763 } 2764 // 2765 // True when Opc sets flag 2766 static bool isCombineInstrSettingFlag(unsigned Opc) { 2767 switch (Opc) { 2768 case AArch64::ADDSWrr: 2769 case AArch64::ADDSWri: 2770 case AArch64::ADDSXrr: 2771 case AArch64::ADDSXri: 2772 case AArch64::SUBSWrr: 2773 case AArch64::SUBSXrr: 2774 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 2775 case AArch64::SUBSWri: 2776 case AArch64::SUBSXri: 2777 return true; 2778 default: 2779 break; 2780 } 2781 return false; 2782 } 2783 // 2784 // 32b Opcodes that can be combined with a MUL 2785 static bool isCombineInstrCandidate32(unsigned Opc) { 2786 switch (Opc) { 2787 case AArch64::ADDWrr: 2788 case AArch64::ADDWri: 2789 case AArch64::SUBWrr: 2790 case AArch64::ADDSWrr: 2791 case AArch64::ADDSWri: 2792 case AArch64::SUBSWrr: 2793 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 2794 case AArch64::SUBWri: 2795 case AArch64::SUBSWri: 2796 return true; 2797 default: 2798 break; 2799 } 2800 return false; 2801 } 2802 // 2803 // 64b Opcodes that can be combined with a MUL 2804 static bool isCombineInstrCandidate64(unsigned Opc) { 2805 switch (Opc) { 2806 case AArch64::ADDXrr: 2807 case AArch64::ADDXri: 2808 case AArch64::SUBXrr: 2809 case AArch64::ADDSXrr: 2810 case AArch64::ADDSXri: 2811 case AArch64::SUBSXrr: 2812 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 2813 case AArch64::SUBXri: 2814 case AArch64::SUBSXri: 2815 return true; 2816 default: 2817 break; 2818 } 2819 return false; 2820 } 2821 // 2822 // FP Opcodes that can be combined with a FMUL 2823 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { 2824 switch (Inst.getOpcode()) { 2825 case AArch64::FADDSrr: 2826 case AArch64::FADDDrr: 2827 case AArch64::FADDv2f32: 2828 case AArch64::FADDv2f64: 2829 case AArch64::FADDv4f32: 2830 case AArch64::FSUBSrr: 2831 case AArch64::FSUBDrr: 2832 case AArch64::FSUBv2f32: 2833 case AArch64::FSUBv2f64: 2834 case AArch64::FSUBv4f32: 2835 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; 2836 default: 2837 break; 2838 } 2839 return false; 2840 } 2841 // 2842 // Opcodes that can be combined with a MUL 2843 static bool isCombineInstrCandidate(unsigned Opc) { 2844 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc)); 2845 } 2846 2847 // 2848 // Utility routine that checks if \param MO is defined by an 2849 // \param CombineOpc instruction in the basic block \param MBB 2850 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, 2851 unsigned CombineOpc, unsigned ZeroReg = 0, 2852 bool CheckZeroReg = false) { 2853 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2854 MachineInstr *MI = nullptr; 2855 2856 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) 2857 MI = MRI.getUniqueVRegDef(MO.getReg()); 2858 // And it needs to be in the trace (otherwise, it won't have a depth). 2859 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc) 2860 return false; 2861 // Must only used by the user we combine with. 2862 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 2863 return false; 2864 2865 if (CheckZeroReg) { 2866 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && 2867 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && 2868 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs"); 2869 // The third input reg must be zero. 2870 if (MI->getOperand(3).getReg() != ZeroReg) 2871 return false; 2872 } 2873 2874 return true; 2875 } 2876 2877 // 2878 // Is \param MO defined by an integer multiply and can be combined? 2879 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, 2880 unsigned MulOpc, unsigned ZeroReg) { 2881 return canCombine(MBB, MO, MulOpc, ZeroReg, true); 2882 } 2883 2884 // 2885 // Is \param MO defined by a floating-point multiply and can be combined? 2886 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, 2887 unsigned MulOpc) { 2888 return canCombine(MBB, MO, MulOpc); 2889 } 2890 2891 // TODO: There are many more machine instruction opcodes to match: 2892 // 1. Other data types (integer, vectors) 2893 // 2. Other math / logic operations (xor, or) 2894 // 3. Other forms of the same operation (intrinsics and other variants) 2895 bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { 2896 switch (Inst.getOpcode()) { 2897 case AArch64::FADDDrr: 2898 case AArch64::FADDSrr: 2899 case AArch64::FADDv2f32: 2900 case AArch64::FADDv2f64: 2901 case AArch64::FADDv4f32: 2902 case AArch64::FMULDrr: 2903 case AArch64::FMULSrr: 2904 case AArch64::FMULX32: 2905 case AArch64::FMULX64: 2906 case AArch64::FMULXv2f32: 2907 case AArch64::FMULXv2f64: 2908 case AArch64::FMULXv4f32: 2909 case AArch64::FMULv2f32: 2910 case AArch64::FMULv2f64: 2911 case AArch64::FMULv4f32: 2912 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; 2913 default: 2914 return false; 2915 } 2916 } 2917 2918 /// Find instructions that can be turned into madd. 2919 static bool getMaddPatterns(MachineInstr &Root, 2920 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 2921 unsigned Opc = Root.getOpcode(); 2922 MachineBasicBlock &MBB = *Root.getParent(); 2923 bool Found = false; 2924 2925 if (!isCombineInstrCandidate(Opc)) 2926 return false; 2927 if (isCombineInstrSettingFlag(Opc)) { 2928 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true); 2929 // When NZCV is live bail out. 2930 if (Cmp_NZCV == -1) 2931 return false; 2932 unsigned NewOpc = convertFlagSettingOpcode(Root); 2933 // When opcode can't change bail out. 2934 // CHECKME: do we miss any cases for opcode conversion? 2935 if (NewOpc == Opc) 2936 return false; 2937 Opc = NewOpc; 2938 } 2939 2940 switch (Opc) { 2941 default: 2942 break; 2943 case AArch64::ADDWrr: 2944 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 2945 "ADDWrr does not have register operands"); 2946 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 2947 AArch64::WZR)) { 2948 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1); 2949 Found = true; 2950 } 2951 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, 2952 AArch64::WZR)) { 2953 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2); 2954 Found = true; 2955 } 2956 break; 2957 case AArch64::ADDXrr: 2958 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 2959 AArch64::XZR)) { 2960 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1); 2961 Found = true; 2962 } 2963 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, 2964 AArch64::XZR)) { 2965 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2); 2966 Found = true; 2967 } 2968 break; 2969 case AArch64::SUBWrr: 2970 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 2971 AArch64::WZR)) { 2972 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1); 2973 Found = true; 2974 } 2975 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, 2976 AArch64::WZR)) { 2977 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2); 2978 Found = true; 2979 } 2980 break; 2981 case AArch64::SUBXrr: 2982 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 2983 AArch64::XZR)) { 2984 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1); 2985 Found = true; 2986 } 2987 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, 2988 AArch64::XZR)) { 2989 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2); 2990 Found = true; 2991 } 2992 break; 2993 case AArch64::ADDWri: 2994 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 2995 AArch64::WZR)) { 2996 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1); 2997 Found = true; 2998 } 2999 break; 3000 case AArch64::ADDXri: 3001 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3002 AArch64::XZR)) { 3003 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1); 3004 Found = true; 3005 } 3006 break; 3007 case AArch64::SUBWri: 3008 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3009 AArch64::WZR)) { 3010 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1); 3011 Found = true; 3012 } 3013 break; 3014 case AArch64::SUBXri: 3015 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3016 AArch64::XZR)) { 3017 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1); 3018 Found = true; 3019 } 3020 break; 3021 } 3022 return Found; 3023 } 3024 /// Floating-Point Support 3025 3026 /// Find instructions that can be turned into madd. 3027 static bool getFMAPatterns(MachineInstr &Root, 3028 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 3029 3030 if (!isCombineInstrCandidateFP(Root)) 3031 return 0; 3032 3033 MachineBasicBlock &MBB = *Root.getParent(); 3034 bool Found = false; 3035 3036 switch (Root.getOpcode()) { 3037 default: 3038 assert(false && "Unsupported FP instruction in combiner\n"); 3039 break; 3040 case AArch64::FADDSrr: 3041 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 3042 "FADDWrr does not have register operands"); 3043 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { 3044 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1); 3045 Found = true; 3046 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3047 AArch64::FMULv1i32_indexed)) { 3048 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1); 3049 Found = true; 3050 } 3051 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { 3052 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2); 3053 Found = true; 3054 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3055 AArch64::FMULv1i32_indexed)) { 3056 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2); 3057 Found = true; 3058 } 3059 break; 3060 case AArch64::FADDDrr: 3061 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { 3062 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1); 3063 Found = true; 3064 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3065 AArch64::FMULv1i64_indexed)) { 3066 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1); 3067 Found = true; 3068 } 3069 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { 3070 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2); 3071 Found = true; 3072 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3073 AArch64::FMULv1i64_indexed)) { 3074 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2); 3075 Found = true; 3076 } 3077 break; 3078 case AArch64::FADDv2f32: 3079 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3080 AArch64::FMULv2i32_indexed)) { 3081 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1); 3082 Found = true; 3083 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3084 AArch64::FMULv2f32)) { 3085 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1); 3086 Found = true; 3087 } 3088 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3089 AArch64::FMULv2i32_indexed)) { 3090 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2); 3091 Found = true; 3092 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3093 AArch64::FMULv2f32)) { 3094 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2); 3095 Found = true; 3096 } 3097 break; 3098 case AArch64::FADDv2f64: 3099 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3100 AArch64::FMULv2i64_indexed)) { 3101 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1); 3102 Found = true; 3103 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3104 AArch64::FMULv2f64)) { 3105 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1); 3106 Found = true; 3107 } 3108 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3109 AArch64::FMULv2i64_indexed)) { 3110 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2); 3111 Found = true; 3112 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3113 AArch64::FMULv2f64)) { 3114 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2); 3115 Found = true; 3116 } 3117 break; 3118 case AArch64::FADDv4f32: 3119 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3120 AArch64::FMULv4i32_indexed)) { 3121 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1); 3122 Found = true; 3123 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3124 AArch64::FMULv4f32)) { 3125 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1); 3126 Found = true; 3127 } 3128 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3129 AArch64::FMULv4i32_indexed)) { 3130 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2); 3131 Found = true; 3132 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3133 AArch64::FMULv4f32)) { 3134 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2); 3135 Found = true; 3136 } 3137 break; 3138 3139 case AArch64::FSUBSrr: 3140 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { 3141 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1); 3142 Found = true; 3143 } 3144 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { 3145 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2); 3146 Found = true; 3147 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3148 AArch64::FMULv1i32_indexed)) { 3149 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2); 3150 Found = true; 3151 } 3152 break; 3153 case AArch64::FSUBDrr: 3154 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { 3155 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1); 3156 Found = true; 3157 } 3158 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { 3159 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2); 3160 Found = true; 3161 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3162 AArch64::FMULv1i64_indexed)) { 3163 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2); 3164 Found = true; 3165 } 3166 break; 3167 case AArch64::FSUBv2f32: 3168 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3169 AArch64::FMULv2i32_indexed)) { 3170 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2); 3171 Found = true; 3172 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3173 AArch64::FMULv2f32)) { 3174 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2); 3175 Found = true; 3176 } 3177 break; 3178 case AArch64::FSUBv2f64: 3179 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3180 AArch64::FMULv2i64_indexed)) { 3181 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2); 3182 Found = true; 3183 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3184 AArch64::FMULv2f64)) { 3185 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2); 3186 Found = true; 3187 } 3188 break; 3189 case AArch64::FSUBv4f32: 3190 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3191 AArch64::FMULv4i32_indexed)) { 3192 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2); 3193 Found = true; 3194 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3195 AArch64::FMULv4f32)) { 3196 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2); 3197 Found = true; 3198 } 3199 break; 3200 } 3201 return Found; 3202 } 3203 3204 /// Return true when a code sequence can improve throughput. It 3205 /// should be called only for instructions in loops. 3206 /// \param Pattern - combiner pattern 3207 bool 3208 AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { 3209 switch (Pattern) { 3210 default: 3211 break; 3212 case MachineCombinerPattern::FMULADDS_OP1: 3213 case MachineCombinerPattern::FMULADDS_OP2: 3214 case MachineCombinerPattern::FMULSUBS_OP1: 3215 case MachineCombinerPattern::FMULSUBS_OP2: 3216 case MachineCombinerPattern::FMULADDD_OP1: 3217 case MachineCombinerPattern::FMULADDD_OP2: 3218 case MachineCombinerPattern::FMULSUBD_OP1: 3219 case MachineCombinerPattern::FMULSUBD_OP2: 3220 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 3221 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 3222 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 3223 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 3224 case MachineCombinerPattern::FMLAv2f32_OP2: 3225 case MachineCombinerPattern::FMLAv2f32_OP1: 3226 case MachineCombinerPattern::FMLAv2f64_OP1: 3227 case MachineCombinerPattern::FMLAv2f64_OP2: 3228 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 3229 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 3230 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 3231 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 3232 case MachineCombinerPattern::FMLAv4f32_OP1: 3233 case MachineCombinerPattern::FMLAv4f32_OP2: 3234 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 3235 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 3236 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 3237 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 3238 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 3239 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 3240 case MachineCombinerPattern::FMLSv2f32_OP2: 3241 case MachineCombinerPattern::FMLSv2f64_OP2: 3242 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 3243 case MachineCombinerPattern::FMLSv4f32_OP2: 3244 return true; 3245 } // end switch (Pattern) 3246 return false; 3247 } 3248 /// Return true when there is potentially a faster code sequence for an 3249 /// instruction chain ending in \p Root. All potential patterns are listed in 3250 /// the \p Pattern vector. Pattern should be sorted in priority order since the 3251 /// pattern evaluator stops checking as soon as it finds a faster sequence. 3252 3253 bool AArch64InstrInfo::getMachineCombinerPatterns( 3254 MachineInstr &Root, 3255 SmallVectorImpl<MachineCombinerPattern> &Patterns) const { 3256 // Integer patterns 3257 if (getMaddPatterns(Root, Patterns)) 3258 return true; 3259 // Floating point patterns 3260 if (getFMAPatterns(Root, Patterns)) 3261 return true; 3262 3263 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); 3264 } 3265 3266 enum class FMAInstKind { Default, Indexed, Accumulator }; 3267 /// genFusedMultiply - Generate fused multiply instructions. 3268 /// This function supports both integer and floating point instructions. 3269 /// A typical example: 3270 /// F|MUL I=A,B,0 3271 /// F|ADD R,I,C 3272 /// ==> F|MADD R,A,B,C 3273 /// \param Root is the F|ADD instruction 3274 /// \param [out] InsInstrs is a vector of machine instructions and will 3275 /// contain the generated madd instruction 3276 /// \param IdxMulOpd is index of operand in Root that is the result of 3277 /// the F|MUL. In the example above IdxMulOpd is 1. 3278 /// \param MaddOpc the opcode fo the f|madd instruction 3279 static MachineInstr * 3280 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, 3281 const TargetInstrInfo *TII, MachineInstr &Root, 3282 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd, 3283 unsigned MaddOpc, const TargetRegisterClass *RC, 3284 FMAInstKind kind = FMAInstKind::Default) { 3285 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 3286 3287 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; 3288 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 3289 unsigned ResultReg = Root.getOperand(0).getReg(); 3290 unsigned SrcReg0 = MUL->getOperand(1).getReg(); 3291 bool Src0IsKill = MUL->getOperand(1).isKill(); 3292 unsigned SrcReg1 = MUL->getOperand(2).getReg(); 3293 bool Src1IsKill = MUL->getOperand(2).isKill(); 3294 unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); 3295 bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); 3296 3297 if (TargetRegisterInfo::isVirtualRegister(ResultReg)) 3298 MRI.constrainRegClass(ResultReg, RC); 3299 if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) 3300 MRI.constrainRegClass(SrcReg0, RC); 3301 if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) 3302 MRI.constrainRegClass(SrcReg1, RC); 3303 if (TargetRegisterInfo::isVirtualRegister(SrcReg2)) 3304 MRI.constrainRegClass(SrcReg2, RC); 3305 3306 MachineInstrBuilder MIB; 3307 if (kind == FMAInstKind::Default) 3308 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 3309 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 3310 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 3311 .addReg(SrcReg2, getKillRegState(Src2IsKill)); 3312 else if (kind == FMAInstKind::Indexed) 3313 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 3314 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 3315 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 3316 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 3317 .addImm(MUL->getOperand(3).getImm()); 3318 else if (kind == FMAInstKind::Accumulator) 3319 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 3320 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 3321 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 3322 .addReg(SrcReg1, getKillRegState(Src1IsKill)); 3323 else 3324 assert(false && "Invalid FMA instruction kind \n"); 3325 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL) 3326 InsInstrs.push_back(MIB); 3327 return MUL; 3328 } 3329 3330 /// genMaddR - Generate madd instruction and combine mul and add using 3331 /// an extra virtual register 3332 /// Example - an ADD intermediate needs to be stored in a register: 3333 /// MUL I=A,B,0 3334 /// ADD R,I,Imm 3335 /// ==> ORR V, ZR, Imm 3336 /// ==> MADD R,A,B,V 3337 /// \param Root is the ADD instruction 3338 /// \param [out] InsInstrs is a vector of machine instructions and will 3339 /// contain the generated madd instruction 3340 /// \param IdxMulOpd is index of operand in Root that is the result of 3341 /// the MUL. In the example above IdxMulOpd is 1. 3342 /// \param MaddOpc the opcode fo the madd instruction 3343 /// \param VR is a virtual register that holds the value of an ADD operand 3344 /// (V in the example above). 3345 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, 3346 const TargetInstrInfo *TII, MachineInstr &Root, 3347 SmallVectorImpl<MachineInstr *> &InsInstrs, 3348 unsigned IdxMulOpd, unsigned MaddOpc, 3349 unsigned VR, const TargetRegisterClass *RC) { 3350 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 3351 3352 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 3353 unsigned ResultReg = Root.getOperand(0).getReg(); 3354 unsigned SrcReg0 = MUL->getOperand(1).getReg(); 3355 bool Src0IsKill = MUL->getOperand(1).isKill(); 3356 unsigned SrcReg1 = MUL->getOperand(2).getReg(); 3357 bool Src1IsKill = MUL->getOperand(2).isKill(); 3358 3359 if (TargetRegisterInfo::isVirtualRegister(ResultReg)) 3360 MRI.constrainRegClass(ResultReg, RC); 3361 if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) 3362 MRI.constrainRegClass(SrcReg0, RC); 3363 if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) 3364 MRI.constrainRegClass(SrcReg1, RC); 3365 if (TargetRegisterInfo::isVirtualRegister(VR)) 3366 MRI.constrainRegClass(VR, RC); 3367 3368 MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), 3369 ResultReg) 3370 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 3371 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 3372 .addReg(VR); 3373 // Insert the MADD 3374 InsInstrs.push_back(MIB); 3375 return MUL; 3376 } 3377 3378 /// When getMachineCombinerPatterns() finds potential patterns, 3379 /// this function generates the instructions that could replace the 3380 /// original code sequence 3381 void AArch64InstrInfo::genAlternativeCodeSequence( 3382 MachineInstr &Root, MachineCombinerPattern Pattern, 3383 SmallVectorImpl<MachineInstr *> &InsInstrs, 3384 SmallVectorImpl<MachineInstr *> &DelInstrs, 3385 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 3386 MachineBasicBlock &MBB = *Root.getParent(); 3387 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 3388 MachineFunction &MF = *MBB.getParent(); 3389 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 3390 3391 MachineInstr *MUL; 3392 const TargetRegisterClass *RC; 3393 unsigned Opc; 3394 switch (Pattern) { 3395 default: 3396 // Reassociate instructions. 3397 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, 3398 DelInstrs, InstrIdxForVirtReg); 3399 return; 3400 case MachineCombinerPattern::MULADDW_OP1: 3401 case MachineCombinerPattern::MULADDX_OP1: 3402 // MUL I=A,B,0 3403 // ADD R,I,C 3404 // ==> MADD R,A,B,C 3405 // --- Create(MADD); 3406 if (Pattern == MachineCombinerPattern::MULADDW_OP1) { 3407 Opc = AArch64::MADDWrrr; 3408 RC = &AArch64::GPR32RegClass; 3409 } else { 3410 Opc = AArch64::MADDXrrr; 3411 RC = &AArch64::GPR64RegClass; 3412 } 3413 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 3414 break; 3415 case MachineCombinerPattern::MULADDW_OP2: 3416 case MachineCombinerPattern::MULADDX_OP2: 3417 // MUL I=A,B,0 3418 // ADD R,C,I 3419 // ==> MADD R,A,B,C 3420 // --- Create(MADD); 3421 if (Pattern == MachineCombinerPattern::MULADDW_OP2) { 3422 Opc = AArch64::MADDWrrr; 3423 RC = &AArch64::GPR32RegClass; 3424 } else { 3425 Opc = AArch64::MADDXrrr; 3426 RC = &AArch64::GPR64RegClass; 3427 } 3428 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 3429 break; 3430 case MachineCombinerPattern::MULADDWI_OP1: 3431 case MachineCombinerPattern::MULADDXI_OP1: { 3432 // MUL I=A,B,0 3433 // ADD R,I,Imm 3434 // ==> ORR V, ZR, Imm 3435 // ==> MADD R,A,B,V 3436 // --- Create(MADD); 3437 const TargetRegisterClass *OrrRC; 3438 unsigned BitSize, OrrOpc, ZeroReg; 3439 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) { 3440 OrrOpc = AArch64::ORRWri; 3441 OrrRC = &AArch64::GPR32spRegClass; 3442 BitSize = 32; 3443 ZeroReg = AArch64::WZR; 3444 Opc = AArch64::MADDWrrr; 3445 RC = &AArch64::GPR32RegClass; 3446 } else { 3447 OrrOpc = AArch64::ORRXri; 3448 OrrRC = &AArch64::GPR64spRegClass; 3449 BitSize = 64; 3450 ZeroReg = AArch64::XZR; 3451 Opc = AArch64::MADDXrrr; 3452 RC = &AArch64::GPR64RegClass; 3453 } 3454 unsigned NewVR = MRI.createVirtualRegister(OrrRC); 3455 uint64_t Imm = Root.getOperand(2).getImm(); 3456 3457 if (Root.getOperand(3).isImm()) { 3458 unsigned Val = Root.getOperand(3).getImm(); 3459 Imm = Imm << Val; 3460 } 3461 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 3462 uint64_t Encoding; 3463 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 3464 MachineInstrBuilder MIB1 = 3465 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 3466 .addReg(ZeroReg) 3467 .addImm(Encoding); 3468 InsInstrs.push_back(MIB1); 3469 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 3470 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 3471 } 3472 break; 3473 } 3474 case MachineCombinerPattern::MULSUBW_OP1: 3475 case MachineCombinerPattern::MULSUBX_OP1: { 3476 // MUL I=A,B,0 3477 // SUB R,I, C 3478 // ==> SUB V, 0, C 3479 // ==> MADD R,A,B,V // = -C + A*B 3480 // --- Create(MADD); 3481 const TargetRegisterClass *SubRC; 3482 unsigned SubOpc, ZeroReg; 3483 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) { 3484 SubOpc = AArch64::SUBWrr; 3485 SubRC = &AArch64::GPR32spRegClass; 3486 ZeroReg = AArch64::WZR; 3487 Opc = AArch64::MADDWrrr; 3488 RC = &AArch64::GPR32RegClass; 3489 } else { 3490 SubOpc = AArch64::SUBXrr; 3491 SubRC = &AArch64::GPR64spRegClass; 3492 ZeroReg = AArch64::XZR; 3493 Opc = AArch64::MADDXrrr; 3494 RC = &AArch64::GPR64RegClass; 3495 } 3496 unsigned NewVR = MRI.createVirtualRegister(SubRC); 3497 // SUB NewVR, 0, C 3498 MachineInstrBuilder MIB1 = 3499 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) 3500 .addReg(ZeroReg) 3501 .addOperand(Root.getOperand(2)); 3502 InsInstrs.push_back(MIB1); 3503 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 3504 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 3505 break; 3506 } 3507 case MachineCombinerPattern::MULSUBW_OP2: 3508 case MachineCombinerPattern::MULSUBX_OP2: 3509 // MUL I=A,B,0 3510 // SUB R,C,I 3511 // ==> MSUB R,A,B,C (computes C - A*B) 3512 // --- Create(MSUB); 3513 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) { 3514 Opc = AArch64::MSUBWrrr; 3515 RC = &AArch64::GPR32RegClass; 3516 } else { 3517 Opc = AArch64::MSUBXrrr; 3518 RC = &AArch64::GPR64RegClass; 3519 } 3520 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 3521 break; 3522 case MachineCombinerPattern::MULSUBWI_OP1: 3523 case MachineCombinerPattern::MULSUBXI_OP1: { 3524 // MUL I=A,B,0 3525 // SUB R,I, Imm 3526 // ==> ORR V, ZR, -Imm 3527 // ==> MADD R,A,B,V // = -Imm + A*B 3528 // --- Create(MADD); 3529 const TargetRegisterClass *OrrRC; 3530 unsigned BitSize, OrrOpc, ZeroReg; 3531 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) { 3532 OrrOpc = AArch64::ORRWri; 3533 OrrRC = &AArch64::GPR32spRegClass; 3534 BitSize = 32; 3535 ZeroReg = AArch64::WZR; 3536 Opc = AArch64::MADDWrrr; 3537 RC = &AArch64::GPR32RegClass; 3538 } else { 3539 OrrOpc = AArch64::ORRXri; 3540 OrrRC = &AArch64::GPR64spRegClass; 3541 BitSize = 64; 3542 ZeroReg = AArch64::XZR; 3543 Opc = AArch64::MADDXrrr; 3544 RC = &AArch64::GPR64RegClass; 3545 } 3546 unsigned NewVR = MRI.createVirtualRegister(OrrRC); 3547 int Imm = Root.getOperand(2).getImm(); 3548 if (Root.getOperand(3).isImm()) { 3549 unsigned Val = Root.getOperand(3).getImm(); 3550 Imm = Imm << Val; 3551 } 3552 uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize); 3553 uint64_t Encoding; 3554 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 3555 MachineInstrBuilder MIB1 = 3556 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 3557 .addReg(ZeroReg) 3558 .addImm(Encoding); 3559 InsInstrs.push_back(MIB1); 3560 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 3561 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 3562 } 3563 break; 3564 } 3565 // Floating Point Support 3566 case MachineCombinerPattern::FMULADDS_OP1: 3567 case MachineCombinerPattern::FMULADDD_OP1: 3568 // MUL I=A,B,0 3569 // ADD R,I,C 3570 // ==> MADD R,A,B,C 3571 // --- Create(MADD); 3572 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) { 3573 Opc = AArch64::FMADDSrrr; 3574 RC = &AArch64::FPR32RegClass; 3575 } else { 3576 Opc = AArch64::FMADDDrrr; 3577 RC = &AArch64::FPR64RegClass; 3578 } 3579 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 3580 break; 3581 case MachineCombinerPattern::FMULADDS_OP2: 3582 case MachineCombinerPattern::FMULADDD_OP2: 3583 // FMUL I=A,B,0 3584 // FADD R,C,I 3585 // ==> FMADD R,A,B,C 3586 // --- Create(FMADD); 3587 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) { 3588 Opc = AArch64::FMADDSrrr; 3589 RC = &AArch64::FPR32RegClass; 3590 } else { 3591 Opc = AArch64::FMADDDrrr; 3592 RC = &AArch64::FPR64RegClass; 3593 } 3594 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 3595 break; 3596 3597 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 3598 Opc = AArch64::FMLAv1i32_indexed; 3599 RC = &AArch64::FPR32RegClass; 3600 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 3601 FMAInstKind::Indexed); 3602 break; 3603 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 3604 Opc = AArch64::FMLAv1i32_indexed; 3605 RC = &AArch64::FPR32RegClass; 3606 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3607 FMAInstKind::Indexed); 3608 break; 3609 3610 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 3611 Opc = AArch64::FMLAv1i64_indexed; 3612 RC = &AArch64::FPR64RegClass; 3613 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 3614 FMAInstKind::Indexed); 3615 break; 3616 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 3617 Opc = AArch64::FMLAv1i64_indexed; 3618 RC = &AArch64::FPR64RegClass; 3619 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3620 FMAInstKind::Indexed); 3621 break; 3622 3623 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 3624 case MachineCombinerPattern::FMLAv2f32_OP1: 3625 RC = &AArch64::FPR64RegClass; 3626 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) { 3627 Opc = AArch64::FMLAv2i32_indexed; 3628 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 3629 FMAInstKind::Indexed); 3630 } else { 3631 Opc = AArch64::FMLAv2f32; 3632 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 3633 FMAInstKind::Accumulator); 3634 } 3635 break; 3636 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 3637 case MachineCombinerPattern::FMLAv2f32_OP2: 3638 RC = &AArch64::FPR64RegClass; 3639 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) { 3640 Opc = AArch64::FMLAv2i32_indexed; 3641 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3642 FMAInstKind::Indexed); 3643 } else { 3644 Opc = AArch64::FMLAv2f32; 3645 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3646 FMAInstKind::Accumulator); 3647 } 3648 break; 3649 3650 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 3651 case MachineCombinerPattern::FMLAv2f64_OP1: 3652 RC = &AArch64::FPR128RegClass; 3653 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) { 3654 Opc = AArch64::FMLAv2i64_indexed; 3655 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 3656 FMAInstKind::Indexed); 3657 } else { 3658 Opc = AArch64::FMLAv2f64; 3659 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 3660 FMAInstKind::Accumulator); 3661 } 3662 break; 3663 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 3664 case MachineCombinerPattern::FMLAv2f64_OP2: 3665 RC = &AArch64::FPR128RegClass; 3666 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) { 3667 Opc = AArch64::FMLAv2i64_indexed; 3668 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3669 FMAInstKind::Indexed); 3670 } else { 3671 Opc = AArch64::FMLAv2f64; 3672 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3673 FMAInstKind::Accumulator); 3674 } 3675 break; 3676 3677 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 3678 case MachineCombinerPattern::FMLAv4f32_OP1: 3679 RC = &AArch64::FPR128RegClass; 3680 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) { 3681 Opc = AArch64::FMLAv4i32_indexed; 3682 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 3683 FMAInstKind::Indexed); 3684 } else { 3685 Opc = AArch64::FMLAv4f32; 3686 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 3687 FMAInstKind::Accumulator); 3688 } 3689 break; 3690 3691 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 3692 case MachineCombinerPattern::FMLAv4f32_OP2: 3693 RC = &AArch64::FPR128RegClass; 3694 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) { 3695 Opc = AArch64::FMLAv4i32_indexed; 3696 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3697 FMAInstKind::Indexed); 3698 } else { 3699 Opc = AArch64::FMLAv4f32; 3700 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3701 FMAInstKind::Accumulator); 3702 } 3703 break; 3704 3705 case MachineCombinerPattern::FMULSUBS_OP1: 3706 case MachineCombinerPattern::FMULSUBD_OP1: { 3707 // FMUL I=A,B,0 3708 // FSUB R,I,C 3709 // ==> FNMSUB R,A,B,C // = -C + A*B 3710 // --- Create(FNMSUB); 3711 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) { 3712 Opc = AArch64::FNMSUBSrrr; 3713 RC = &AArch64::FPR32RegClass; 3714 } else { 3715 Opc = AArch64::FNMSUBDrrr; 3716 RC = &AArch64::FPR64RegClass; 3717 } 3718 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 3719 break; 3720 } 3721 case MachineCombinerPattern::FMULSUBS_OP2: 3722 case MachineCombinerPattern::FMULSUBD_OP2: { 3723 // FMUL I=A,B,0 3724 // FSUB R,C,I 3725 // ==> FMSUB R,A,B,C (computes C - A*B) 3726 // --- Create(FMSUB); 3727 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) { 3728 Opc = AArch64::FMSUBSrrr; 3729 RC = &AArch64::FPR32RegClass; 3730 } else { 3731 Opc = AArch64::FMSUBDrrr; 3732 RC = &AArch64::FPR64RegClass; 3733 } 3734 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 3735 break; 3736 3737 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 3738 Opc = AArch64::FMLSv1i32_indexed; 3739 RC = &AArch64::FPR32RegClass; 3740 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3741 FMAInstKind::Indexed); 3742 break; 3743 3744 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 3745 Opc = AArch64::FMLSv1i64_indexed; 3746 RC = &AArch64::FPR64RegClass; 3747 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3748 FMAInstKind::Indexed); 3749 break; 3750 3751 case MachineCombinerPattern::FMLSv2f32_OP2: 3752 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 3753 RC = &AArch64::FPR64RegClass; 3754 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) { 3755 Opc = AArch64::FMLSv2i32_indexed; 3756 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3757 FMAInstKind::Indexed); 3758 } else { 3759 Opc = AArch64::FMLSv2f32; 3760 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3761 FMAInstKind::Accumulator); 3762 } 3763 break; 3764 3765 case MachineCombinerPattern::FMLSv2f64_OP2: 3766 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 3767 RC = &AArch64::FPR128RegClass; 3768 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) { 3769 Opc = AArch64::FMLSv2i64_indexed; 3770 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3771 FMAInstKind::Indexed); 3772 } else { 3773 Opc = AArch64::FMLSv2f64; 3774 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3775 FMAInstKind::Accumulator); 3776 } 3777 break; 3778 3779 case MachineCombinerPattern::FMLSv4f32_OP2: 3780 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 3781 RC = &AArch64::FPR128RegClass; 3782 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) { 3783 Opc = AArch64::FMLSv4i32_indexed; 3784 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3785 FMAInstKind::Indexed); 3786 } else { 3787 Opc = AArch64::FMLSv4f32; 3788 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3789 FMAInstKind::Accumulator); 3790 } 3791 break; 3792 } 3793 } // end switch (Pattern) 3794 // Record MUL and ADD/SUB for deletion 3795 DelInstrs.push_back(MUL); 3796 DelInstrs.push_back(&Root); 3797 3798 return; 3799 } 3800 3801 /// \brief Replace csincr-branch sequence by simple conditional branch 3802 /// 3803 /// Examples: 3804 /// 1. 3805 /// csinc w9, wzr, wzr, <condition code> 3806 /// tbnz w9, #0, 0x44 3807 /// to 3808 /// b.<inverted condition code> 3809 /// 3810 /// 2. 3811 /// csinc w9, wzr, wzr, <condition code> 3812 /// tbz w9, #0, 0x44 3813 /// to 3814 /// b.<condition code> 3815 /// 3816 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the 3817 /// compare's constant operand is power of 2. 3818 /// 3819 /// Examples: 3820 /// and w8, w8, #0x400 3821 /// cbnz w8, L1 3822 /// to 3823 /// tbnz w8, #10, L1 3824 /// 3825 /// \param MI Conditional Branch 3826 /// \return True when the simple conditional branch is generated 3827 /// 3828 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { 3829 bool IsNegativeBranch = false; 3830 bool IsTestAndBranch = false; 3831 unsigned TargetBBInMI = 0; 3832 switch (MI.getOpcode()) { 3833 default: 3834 llvm_unreachable("Unknown branch instruction?"); 3835 case AArch64::Bcc: 3836 return false; 3837 case AArch64::CBZW: 3838 case AArch64::CBZX: 3839 TargetBBInMI = 1; 3840 break; 3841 case AArch64::CBNZW: 3842 case AArch64::CBNZX: 3843 TargetBBInMI = 1; 3844 IsNegativeBranch = true; 3845 break; 3846 case AArch64::TBZW: 3847 case AArch64::TBZX: 3848 TargetBBInMI = 2; 3849 IsTestAndBranch = true; 3850 break; 3851 case AArch64::TBNZW: 3852 case AArch64::TBNZX: 3853 TargetBBInMI = 2; 3854 IsNegativeBranch = true; 3855 IsTestAndBranch = true; 3856 break; 3857 } 3858 // So we increment a zero register and test for bits other 3859 // than bit 0? Conservatively bail out in case the verifier 3860 // missed this case. 3861 if (IsTestAndBranch && MI.getOperand(1).getImm()) 3862 return false; 3863 3864 // Find Definition. 3865 assert(MI.getParent() && "Incomplete machine instruciton\n"); 3866 MachineBasicBlock *MBB = MI.getParent(); 3867 MachineFunction *MF = MBB->getParent(); 3868 MachineRegisterInfo *MRI = &MF->getRegInfo(); 3869 unsigned VReg = MI.getOperand(0).getReg(); 3870 if (!TargetRegisterInfo::isVirtualRegister(VReg)) 3871 return false; 3872 3873 MachineInstr *DefMI = MRI->getVRegDef(VReg); 3874 3875 // Look through COPY instructions to find definition. 3876 while (DefMI->isCopy()) { 3877 unsigned CopyVReg = DefMI->getOperand(1).getReg(); 3878 if (!MRI->hasOneNonDBGUse(CopyVReg)) 3879 return false; 3880 if (!MRI->hasOneDef(CopyVReg)) 3881 return false; 3882 DefMI = MRI->getVRegDef(CopyVReg); 3883 } 3884 3885 switch (DefMI->getOpcode()) { 3886 default: 3887 return false; 3888 // Fold AND into a TBZ/TBNZ if constant operand is power of 2. 3889 case AArch64::ANDWri: 3890 case AArch64::ANDXri: { 3891 if (IsTestAndBranch) 3892 return false; 3893 if (DefMI->getParent() != MBB) 3894 return false; 3895 if (!MRI->hasOneNonDBGUse(VReg)) 3896 return false; 3897 3898 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri); 3899 uint64_t Mask = AArch64_AM::decodeLogicalImmediate( 3900 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64); 3901 if (!isPowerOf2_64(Mask)) 3902 return false; 3903 3904 MachineOperand &MO = DefMI->getOperand(1); 3905 unsigned NewReg = MO.getReg(); 3906 if (!TargetRegisterInfo::isVirtualRegister(NewReg)) 3907 return false; 3908 3909 assert(!MRI->def_empty(NewReg) && "Register must be defined."); 3910 3911 MachineBasicBlock &RefToMBB = *MBB; 3912 MachineBasicBlock *TBB = MI.getOperand(1).getMBB(); 3913 DebugLoc DL = MI.getDebugLoc(); 3914 unsigned Imm = Log2_64(Mask); 3915 unsigned Opc = (Imm < 32) 3916 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW) 3917 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX); 3918 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc)) 3919 .addReg(NewReg) 3920 .addImm(Imm) 3921 .addMBB(TBB); 3922 // Register lives on to the CBZ now. 3923 MO.setIsKill(false); 3924 3925 // For immediate smaller than 32, we need to use the 32-bit 3926 // variant (W) in all cases. Indeed the 64-bit variant does not 3927 // allow to encode them. 3928 // Therefore, if the input register is 64-bit, we need to take the 3929 // 32-bit sub-part. 3930 if (!Is32Bit && Imm < 32) 3931 NewMI->getOperand(0).setSubReg(AArch64::sub_32); 3932 MI.eraseFromParent(); 3933 return true; 3934 } 3935 // Look for CSINC 3936 case AArch64::CSINCWr: 3937 case AArch64::CSINCXr: { 3938 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR && 3939 DefMI->getOperand(2).getReg() == AArch64::WZR) && 3940 !(DefMI->getOperand(1).getReg() == AArch64::XZR && 3941 DefMI->getOperand(2).getReg() == AArch64::XZR)) 3942 return false; 3943 3944 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) 3945 return false; 3946 3947 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); 3948 // Convert only when the condition code is not modified between 3949 // the CSINC and the branch. The CC may be used by other 3950 // instructions in between. 3951 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write)) 3952 return false; 3953 MachineBasicBlock &RefToMBB = *MBB; 3954 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB(); 3955 DebugLoc DL = MI.getDebugLoc(); 3956 if (IsNegativeBranch) 3957 CC = AArch64CC::getInvertedCondCode(CC); 3958 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB); 3959 MI.eraseFromParent(); 3960 return true; 3961 } 3962 } 3963 } 3964 3965 std::pair<unsigned, unsigned> 3966 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 3967 const unsigned Mask = AArch64II::MO_FRAGMENT; 3968 return std::make_pair(TF & Mask, TF & ~Mask); 3969 } 3970 3971 ArrayRef<std::pair<unsigned, const char *>> 3972 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 3973 using namespace AArch64II; 3974 static const std::pair<unsigned, const char *> TargetFlags[] = { 3975 {MO_PAGE, "aarch64-page"}, 3976 {MO_PAGEOFF, "aarch64-pageoff"}, 3977 {MO_G3, "aarch64-g3"}, 3978 {MO_G2, "aarch64-g2"}, 3979 {MO_G1, "aarch64-g1"}, 3980 {MO_G0, "aarch64-g0"}, 3981 {MO_HI12, "aarch64-hi12"}}; 3982 return makeArrayRef(TargetFlags); 3983 } 3984 3985 ArrayRef<std::pair<unsigned, const char *>> 3986 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { 3987 using namespace AArch64II; 3988 static const std::pair<unsigned, const char *> TargetFlags[] = { 3989 {MO_GOT, "aarch64-got"}, 3990 {MO_NC, "aarch64-nc"}, 3991 {MO_TLS, "aarch64-tls"}}; 3992 return makeArrayRef(TargetFlags); 3993 } 3994