1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the AArch64 implementation of the TargetInstrInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64InstrInfo.h" 15 #include "AArch64Subtarget.h" 16 #include "MCTargetDesc/AArch64AddressingModes.h" 17 #include "llvm/CodeGen/MachineFrameInfo.h" 18 #include "llvm/CodeGen/MachineInstrBuilder.h" 19 #include "llvm/CodeGen/MachineMemOperand.h" 20 #include "llvm/CodeGen/MachineRegisterInfo.h" 21 #include "llvm/CodeGen/PseudoSourceValue.h" 22 #include "llvm/MC/MCInst.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Support/TargetRegistry.h" 25 26 using namespace llvm; 27 28 #define GET_INSTRINFO_CTOR_DTOR 29 #include "AArch64GenInstrInfo.inc" 30 31 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) 32 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), 33 RI(STI.getTargetTriple()), Subtarget(STI) {} 34 35 /// GetInstSize - Return the number of bytes of code the specified 36 /// instruction may be. This returns the maximum number of bytes. 37 unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 38 const MachineBasicBlock &MBB = *MI->getParent(); 39 const MachineFunction *MF = MBB.getParent(); 40 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 41 42 if (MI->getOpcode() == AArch64::INLINEASM) 43 return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); 44 45 const MCInstrDesc &Desc = MI->getDesc(); 46 switch (Desc.getOpcode()) { 47 default: 48 // Anything not explicitly designated otherwise is a nomal 4-byte insn. 49 return 4; 50 case TargetOpcode::DBG_VALUE: 51 case TargetOpcode::EH_LABEL: 52 case TargetOpcode::IMPLICIT_DEF: 53 case TargetOpcode::KILL: 54 return 0; 55 } 56 57 llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size"); 58 } 59 60 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, 61 SmallVectorImpl<MachineOperand> &Cond) { 62 // Block ends with fall-through condbranch. 63 switch (LastInst->getOpcode()) { 64 default: 65 llvm_unreachable("Unknown branch instruction?"); 66 case AArch64::Bcc: 67 Target = LastInst->getOperand(1).getMBB(); 68 Cond.push_back(LastInst->getOperand(0)); 69 break; 70 case AArch64::CBZW: 71 case AArch64::CBZX: 72 case AArch64::CBNZW: 73 case AArch64::CBNZX: 74 Target = LastInst->getOperand(1).getMBB(); 75 Cond.push_back(MachineOperand::CreateImm(-1)); 76 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 77 Cond.push_back(LastInst->getOperand(0)); 78 break; 79 case AArch64::TBZW: 80 case AArch64::TBZX: 81 case AArch64::TBNZW: 82 case AArch64::TBNZX: 83 Target = LastInst->getOperand(2).getMBB(); 84 Cond.push_back(MachineOperand::CreateImm(-1)); 85 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 86 Cond.push_back(LastInst->getOperand(0)); 87 Cond.push_back(LastInst->getOperand(1)); 88 } 89 } 90 91 // Branch analysis. 92 bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 93 MachineBasicBlock *&TBB, 94 MachineBasicBlock *&FBB, 95 SmallVectorImpl<MachineOperand> &Cond, 96 bool AllowModify) const { 97 // If the block has no terminators, it just falls into the block after it. 98 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 99 if (I == MBB.end()) 100 return false; 101 102 if (!isUnpredicatedTerminator(I)) 103 return false; 104 105 // Get the last instruction in the block. 106 MachineInstr *LastInst = I; 107 108 // If there is only one terminator instruction, process it. 109 unsigned LastOpc = LastInst->getOpcode(); 110 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 111 if (isUncondBranchOpcode(LastOpc)) { 112 TBB = LastInst->getOperand(0).getMBB(); 113 return false; 114 } 115 if (isCondBranchOpcode(LastOpc)) { 116 // Block ends with fall-through condbranch. 117 parseCondBranch(LastInst, TBB, Cond); 118 return false; 119 } 120 return true; // Can't handle indirect branch. 121 } 122 123 // Get the instruction before it if it is a terminator. 124 MachineInstr *SecondLastInst = I; 125 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 126 127 // If AllowModify is true and the block ends with two or more unconditional 128 // branches, delete all but the first unconditional branch. 129 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 130 while (isUncondBranchOpcode(SecondLastOpc)) { 131 LastInst->eraseFromParent(); 132 LastInst = SecondLastInst; 133 LastOpc = LastInst->getOpcode(); 134 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 135 // Return now the only terminator is an unconditional branch. 136 TBB = LastInst->getOperand(0).getMBB(); 137 return false; 138 } else { 139 SecondLastInst = I; 140 SecondLastOpc = SecondLastInst->getOpcode(); 141 } 142 } 143 } 144 145 // If there are three terminators, we don't know what sort of block this is. 146 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) 147 return true; 148 149 // If the block ends with a B and a Bcc, handle it. 150 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 151 parseCondBranch(SecondLastInst, TBB, Cond); 152 FBB = LastInst->getOperand(0).getMBB(); 153 return false; 154 } 155 156 // If the block ends with two unconditional branches, handle it. The second 157 // one is not executed, so remove it. 158 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 159 TBB = SecondLastInst->getOperand(0).getMBB(); 160 I = LastInst; 161 if (AllowModify) 162 I->eraseFromParent(); 163 return false; 164 } 165 166 // ...likewise if it ends with an indirect branch followed by an unconditional 167 // branch. 168 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 169 I = LastInst; 170 if (AllowModify) 171 I->eraseFromParent(); 172 return true; 173 } 174 175 // Otherwise, can't handle this. 176 return true; 177 } 178 179 bool AArch64InstrInfo::ReverseBranchCondition( 180 SmallVectorImpl<MachineOperand> &Cond) const { 181 if (Cond[0].getImm() != -1) { 182 // Regular Bcc 183 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 184 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); 185 } else { 186 // Folded compare-and-branch 187 switch (Cond[1].getImm()) { 188 default: 189 llvm_unreachable("Unknown conditional branch!"); 190 case AArch64::CBZW: 191 Cond[1].setImm(AArch64::CBNZW); 192 break; 193 case AArch64::CBNZW: 194 Cond[1].setImm(AArch64::CBZW); 195 break; 196 case AArch64::CBZX: 197 Cond[1].setImm(AArch64::CBNZX); 198 break; 199 case AArch64::CBNZX: 200 Cond[1].setImm(AArch64::CBZX); 201 break; 202 case AArch64::TBZW: 203 Cond[1].setImm(AArch64::TBNZW); 204 break; 205 case AArch64::TBNZW: 206 Cond[1].setImm(AArch64::TBZW); 207 break; 208 case AArch64::TBZX: 209 Cond[1].setImm(AArch64::TBNZX); 210 break; 211 case AArch64::TBNZX: 212 Cond[1].setImm(AArch64::TBZX); 213 break; 214 } 215 } 216 217 return false; 218 } 219 220 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 221 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 222 if (I == MBB.end()) 223 return 0; 224 225 if (!isUncondBranchOpcode(I->getOpcode()) && 226 !isCondBranchOpcode(I->getOpcode())) 227 return 0; 228 229 // Remove the branch. 230 I->eraseFromParent(); 231 232 I = MBB.end(); 233 234 if (I == MBB.begin()) 235 return 1; 236 --I; 237 if (!isCondBranchOpcode(I->getOpcode())) 238 return 1; 239 240 // Remove the branch. 241 I->eraseFromParent(); 242 return 2; 243 } 244 245 void AArch64InstrInfo::instantiateCondBranch( 246 MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB, 247 ArrayRef<MachineOperand> Cond) const { 248 if (Cond[0].getImm() != -1) { 249 // Regular Bcc 250 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); 251 } else { 252 // Folded compare-and-branch 253 // Note that we use addOperand instead of addReg to keep the flags. 254 const MachineInstrBuilder MIB = 255 BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]); 256 if (Cond.size() > 3) 257 MIB.addImm(Cond[3].getImm()); 258 MIB.addMBB(TBB); 259 } 260 } 261 262 unsigned AArch64InstrInfo::InsertBranch( 263 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 264 ArrayRef<MachineOperand> Cond, DebugLoc DL) const { 265 // Shouldn't be a fall through. 266 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 267 268 if (!FBB) { 269 if (Cond.empty()) // Unconditional branch? 270 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); 271 else 272 instantiateCondBranch(MBB, DL, TBB, Cond); 273 return 1; 274 } 275 276 // Two-way conditional branch. 277 instantiateCondBranch(MBB, DL, TBB, Cond); 278 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); 279 return 2; 280 } 281 282 // Find the original register that VReg is copied from. 283 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { 284 while (TargetRegisterInfo::isVirtualRegister(VReg)) { 285 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 286 if (!DefMI->isFullCopy()) 287 return VReg; 288 VReg = DefMI->getOperand(1).getReg(); 289 } 290 return VReg; 291 } 292 293 // Determine if VReg is defined by an instruction that can be folded into a 294 // csel instruction. If so, return the folded opcode, and the replacement 295 // register. 296 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, 297 unsigned *NewVReg = nullptr) { 298 VReg = removeCopies(MRI, VReg); 299 if (!TargetRegisterInfo::isVirtualRegister(VReg)) 300 return 0; 301 302 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); 303 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 304 unsigned Opc = 0; 305 unsigned SrcOpNum = 0; 306 switch (DefMI->getOpcode()) { 307 case AArch64::ADDSXri: 308 case AArch64::ADDSWri: 309 // if NZCV is used, do not fold. 310 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 311 return 0; 312 // fall-through to ADDXri and ADDWri. 313 case AArch64::ADDXri: 314 case AArch64::ADDWri: 315 // add x, 1 -> csinc. 316 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || 317 DefMI->getOperand(3).getImm() != 0) 318 return 0; 319 SrcOpNum = 1; 320 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; 321 break; 322 323 case AArch64::ORNXrr: 324 case AArch64::ORNWrr: { 325 // not x -> csinv, represented as orn dst, xzr, src. 326 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 327 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 328 return 0; 329 SrcOpNum = 2; 330 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; 331 break; 332 } 333 334 case AArch64::SUBSXrr: 335 case AArch64::SUBSWrr: 336 // if NZCV is used, do not fold. 337 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 338 return 0; 339 // fall-through to SUBXrr and SUBWrr. 340 case AArch64::SUBXrr: 341 case AArch64::SUBWrr: { 342 // neg x -> csneg, represented as sub dst, xzr, src. 343 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 344 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 345 return 0; 346 SrcOpNum = 2; 347 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; 348 break; 349 } 350 default: 351 return 0; 352 } 353 assert(Opc && SrcOpNum && "Missing parameters"); 354 355 if (NewVReg) 356 *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); 357 return Opc; 358 } 359 360 bool AArch64InstrInfo::canInsertSelect( 361 const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond, 362 unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, 363 int &FalseCycles) const { 364 // Check register classes. 365 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 366 const TargetRegisterClass *RC = 367 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 368 if (!RC) 369 return false; 370 371 // Expanding cbz/tbz requires an extra cycle of latency on the condition. 372 unsigned ExtraCondLat = Cond.size() != 1; 373 374 // GPRs are handled by csel. 375 // FIXME: Fold in x+1, -x, and ~x when applicable. 376 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || 377 AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 378 // Single-cycle csel, csinc, csinv, and csneg. 379 CondCycles = 1 + ExtraCondLat; 380 TrueCycles = FalseCycles = 1; 381 if (canFoldIntoCSel(MRI, TrueReg)) 382 TrueCycles = 0; 383 else if (canFoldIntoCSel(MRI, FalseReg)) 384 FalseCycles = 0; 385 return true; 386 } 387 388 // Scalar floating point is handled by fcsel. 389 // FIXME: Form fabs, fmin, and fmax when applicable. 390 if (AArch64::FPR64RegClass.hasSubClassEq(RC) || 391 AArch64::FPR32RegClass.hasSubClassEq(RC)) { 392 CondCycles = 5 + ExtraCondLat; 393 TrueCycles = FalseCycles = 2; 394 return true; 395 } 396 397 // Can't do vectors. 398 return false; 399 } 400 401 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, 402 MachineBasicBlock::iterator I, DebugLoc DL, 403 unsigned DstReg, 404 ArrayRef<MachineOperand> Cond, 405 unsigned TrueReg, unsigned FalseReg) const { 406 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 407 408 // Parse the condition code, see parseCondBranch() above. 409 AArch64CC::CondCode CC; 410 switch (Cond.size()) { 411 default: 412 llvm_unreachable("Unknown condition opcode in Cond"); 413 case 1: // b.cc 414 CC = AArch64CC::CondCode(Cond[0].getImm()); 415 break; 416 case 3: { // cbz/cbnz 417 // We must insert a compare against 0. 418 bool Is64Bit; 419 switch (Cond[1].getImm()) { 420 default: 421 llvm_unreachable("Unknown branch opcode in Cond"); 422 case AArch64::CBZW: 423 Is64Bit = 0; 424 CC = AArch64CC::EQ; 425 break; 426 case AArch64::CBZX: 427 Is64Bit = 1; 428 CC = AArch64CC::EQ; 429 break; 430 case AArch64::CBNZW: 431 Is64Bit = 0; 432 CC = AArch64CC::NE; 433 break; 434 case AArch64::CBNZX: 435 Is64Bit = 1; 436 CC = AArch64CC::NE; 437 break; 438 } 439 unsigned SrcReg = Cond[2].getReg(); 440 if (Is64Bit) { 441 // cmp reg, #0 is actually subs xzr, reg, #0. 442 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); 443 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) 444 .addReg(SrcReg) 445 .addImm(0) 446 .addImm(0); 447 } else { 448 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); 449 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) 450 .addReg(SrcReg) 451 .addImm(0) 452 .addImm(0); 453 } 454 break; 455 } 456 case 4: { // tbz/tbnz 457 // We must insert a tst instruction. 458 switch (Cond[1].getImm()) { 459 default: 460 llvm_unreachable("Unknown branch opcode in Cond"); 461 case AArch64::TBZW: 462 case AArch64::TBZX: 463 CC = AArch64CC::EQ; 464 break; 465 case AArch64::TBNZW: 466 case AArch64::TBNZX: 467 CC = AArch64CC::NE; 468 break; 469 } 470 // cmp reg, #foo is actually ands xzr, reg, #1<<foo. 471 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW) 472 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR) 473 .addReg(Cond[2].getReg()) 474 .addImm( 475 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32)); 476 else 477 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR) 478 .addReg(Cond[2].getReg()) 479 .addImm( 480 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); 481 break; 482 } 483 } 484 485 unsigned Opc = 0; 486 const TargetRegisterClass *RC = nullptr; 487 bool TryFold = false; 488 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) { 489 RC = &AArch64::GPR64RegClass; 490 Opc = AArch64::CSELXr; 491 TryFold = true; 492 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) { 493 RC = &AArch64::GPR32RegClass; 494 Opc = AArch64::CSELWr; 495 TryFold = true; 496 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) { 497 RC = &AArch64::FPR64RegClass; 498 Opc = AArch64::FCSELDrrr; 499 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) { 500 RC = &AArch64::FPR32RegClass; 501 Opc = AArch64::FCSELSrrr; 502 } 503 assert(RC && "Unsupported regclass"); 504 505 // Try folding simple instructions into the csel. 506 if (TryFold) { 507 unsigned NewVReg = 0; 508 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); 509 if (FoldedOpc) { 510 // The folded opcodes csinc, csinc and csneg apply the operation to 511 // FalseReg, so we need to invert the condition. 512 CC = AArch64CC::getInvertedCondCode(CC); 513 TrueReg = FalseReg; 514 } else 515 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); 516 517 // Fold the operation. Leave any dead instructions for DCE to clean up. 518 if (FoldedOpc) { 519 FalseReg = NewVReg; 520 Opc = FoldedOpc; 521 // The extends the live range of NewVReg. 522 MRI.clearKillFlags(NewVReg); 523 } 524 } 525 526 // Pull all virtual register into the appropriate class. 527 MRI.constrainRegClass(TrueReg, RC); 528 MRI.constrainRegClass(FalseReg, RC); 529 530 // Insert the csel. 531 BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm( 532 CC); 533 } 534 535 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx. 536 static bool canBeExpandedToORR(const MachineInstr *MI, unsigned BitSize) { 537 uint64_t Imm = MI->getOperand(1).getImm(); 538 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 539 uint64_t Encoding; 540 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding); 541 } 542 543 // FIXME: this implementation should be micro-architecture dependent, so a 544 // micro-architecture target hook should be introduced here in future. 545 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const { 546 if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53()) 547 return MI->isAsCheapAsAMove(); 548 549 switch (MI->getOpcode()) { 550 default: 551 return false; 552 553 // add/sub on register without shift 554 case AArch64::ADDWri: 555 case AArch64::ADDXri: 556 case AArch64::SUBWri: 557 case AArch64::SUBXri: 558 return (MI->getOperand(3).getImm() == 0); 559 560 // logical ops on immediate 561 case AArch64::ANDWri: 562 case AArch64::ANDXri: 563 case AArch64::EORWri: 564 case AArch64::EORXri: 565 case AArch64::ORRWri: 566 case AArch64::ORRXri: 567 return true; 568 569 // logical ops on register without shift 570 case AArch64::ANDWrr: 571 case AArch64::ANDXrr: 572 case AArch64::BICWrr: 573 case AArch64::BICXrr: 574 case AArch64::EONWrr: 575 case AArch64::EONXrr: 576 case AArch64::EORWrr: 577 case AArch64::EORXrr: 578 case AArch64::ORNWrr: 579 case AArch64::ORNXrr: 580 case AArch64::ORRWrr: 581 case AArch64::ORRXrr: 582 return true; 583 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or 584 // ORRXri, it is as cheap as MOV 585 case AArch64::MOVi32imm: 586 return canBeExpandedToORR(MI, 32); 587 case AArch64::MOVi64imm: 588 return canBeExpandedToORR(MI, 64); 589 } 590 591 llvm_unreachable("Unknown opcode to check as cheap as a move!"); 592 } 593 594 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 595 unsigned &SrcReg, unsigned &DstReg, 596 unsigned &SubIdx) const { 597 switch (MI.getOpcode()) { 598 default: 599 return false; 600 case AArch64::SBFMXri: // aka sxtw 601 case AArch64::UBFMXri: // aka uxtw 602 // Check for the 32 -> 64 bit extension case, these instructions can do 603 // much more. 604 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) 605 return false; 606 // This is a signed or unsigned 32 -> 64 bit extension. 607 SrcReg = MI.getOperand(1).getReg(); 608 DstReg = MI.getOperand(0).getReg(); 609 SubIdx = AArch64::sub_32; 610 return true; 611 } 612 } 613 614 bool 615 AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, 616 MachineInstr *MIb, 617 AliasAnalysis *AA) const { 618 const TargetRegisterInfo *TRI = &getRegisterInfo(); 619 unsigned BaseRegA = 0, BaseRegB = 0; 620 int OffsetA = 0, OffsetB = 0; 621 int WidthA = 0, WidthB = 0; 622 623 assert(MIa && MIa->mayLoadOrStore() && "MIa must be a load or store."); 624 assert(MIb && MIb->mayLoadOrStore() && "MIb must be a load or store."); 625 626 if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() || 627 MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef()) 628 return false; 629 630 // Retrieve the base register, offset from the base register and width. Width 631 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If 632 // base registers are identical, and the offset of a lower memory access + 633 // the width doesn't overlap the offset of a higher memory access, 634 // then the memory accesses are different. 635 if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) && 636 getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) { 637 if (BaseRegA == BaseRegB) { 638 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 639 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 640 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 641 if (LowOffset + LowWidth <= HighOffset) 642 return true; 643 } 644 } 645 return false; 646 } 647 648 /// analyzeCompare - For a comparison instruction, return the source registers 649 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. 650 /// Return true if the comparison instruction can be analyzed. 651 bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, 652 unsigned &SrcReg2, int &CmpMask, 653 int &CmpValue) const { 654 switch (MI->getOpcode()) { 655 default: 656 break; 657 case AArch64::SUBSWrr: 658 case AArch64::SUBSWrs: 659 case AArch64::SUBSWrx: 660 case AArch64::SUBSXrr: 661 case AArch64::SUBSXrs: 662 case AArch64::SUBSXrx: 663 case AArch64::ADDSWrr: 664 case AArch64::ADDSWrs: 665 case AArch64::ADDSWrx: 666 case AArch64::ADDSXrr: 667 case AArch64::ADDSXrs: 668 case AArch64::ADDSXrx: 669 // Replace SUBSWrr with SUBWrr if NZCV is not used. 670 SrcReg = MI->getOperand(1).getReg(); 671 SrcReg2 = MI->getOperand(2).getReg(); 672 CmpMask = ~0; 673 CmpValue = 0; 674 return true; 675 case AArch64::SUBSWri: 676 case AArch64::ADDSWri: 677 case AArch64::SUBSXri: 678 case AArch64::ADDSXri: 679 SrcReg = MI->getOperand(1).getReg(); 680 SrcReg2 = 0; 681 CmpMask = ~0; 682 // FIXME: In order to convert CmpValue to 0 or 1 683 CmpValue = (MI->getOperand(2).getImm() != 0); 684 return true; 685 case AArch64::ANDSWri: 686 case AArch64::ANDSXri: 687 // ANDS does not use the same encoding scheme as the others xxxS 688 // instructions. 689 SrcReg = MI->getOperand(1).getReg(); 690 SrcReg2 = 0; 691 CmpMask = ~0; 692 // FIXME:The return val type of decodeLogicalImmediate is uint64_t, 693 // while the type of CmpValue is int. When converting uint64_t to int, 694 // the high 32 bits of uint64_t will be lost. 695 // In fact it causes a bug in spec2006-483.xalancbmk 696 // CmpValue is only used to compare with zero in OptimizeCompareInstr 697 CmpValue = (AArch64_AM::decodeLogicalImmediate( 698 MI->getOperand(2).getImm(), 699 MI->getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0); 700 return true; 701 } 702 703 return false; 704 } 705 706 static bool UpdateOperandRegClass(MachineInstr *Instr) { 707 MachineBasicBlock *MBB = Instr->getParent(); 708 assert(MBB && "Can't get MachineBasicBlock here"); 709 MachineFunction *MF = MBB->getParent(); 710 assert(MF && "Can't get MachineFunction here"); 711 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 712 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 713 MachineRegisterInfo *MRI = &MF->getRegInfo(); 714 715 for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx; 716 ++OpIdx) { 717 MachineOperand &MO = Instr->getOperand(OpIdx); 718 const TargetRegisterClass *OpRegCstraints = 719 Instr->getRegClassConstraint(OpIdx, TII, TRI); 720 721 // If there's no constraint, there's nothing to do. 722 if (!OpRegCstraints) 723 continue; 724 // If the operand is a frame index, there's nothing to do here. 725 // A frame index operand will resolve correctly during PEI. 726 if (MO.isFI()) 727 continue; 728 729 assert(MO.isReg() && 730 "Operand has register constraints without being a register!"); 731 732 unsigned Reg = MO.getReg(); 733 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 734 if (!OpRegCstraints->contains(Reg)) 735 return false; 736 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && 737 !MRI->constrainRegClass(Reg, OpRegCstraints)) 738 return false; 739 } 740 741 return true; 742 } 743 744 /// \brief Return the opcode that does not set flags when possible - otherwise 745 /// return the original opcode. The caller is responsible to do the actual 746 /// substitution and legality checking. 747 static unsigned convertFlagSettingOpcode(const MachineInstr *MI) { 748 // Don't convert all compare instructions, because for some the zero register 749 // encoding becomes the sp register. 750 bool MIDefinesZeroReg = false; 751 if (MI->definesRegister(AArch64::WZR) || MI->definesRegister(AArch64::XZR)) 752 MIDefinesZeroReg = true; 753 754 switch (MI->getOpcode()) { 755 default: 756 return MI->getOpcode(); 757 case AArch64::ADDSWrr: 758 return AArch64::ADDWrr; 759 case AArch64::ADDSWri: 760 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri; 761 case AArch64::ADDSWrs: 762 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs; 763 case AArch64::ADDSWrx: 764 return AArch64::ADDWrx; 765 case AArch64::ADDSXrr: 766 return AArch64::ADDXrr; 767 case AArch64::ADDSXri: 768 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri; 769 case AArch64::ADDSXrs: 770 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs; 771 case AArch64::ADDSXrx: 772 return AArch64::ADDXrx; 773 case AArch64::SUBSWrr: 774 return AArch64::SUBWrr; 775 case AArch64::SUBSWri: 776 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri; 777 case AArch64::SUBSWrs: 778 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs; 779 case AArch64::SUBSWrx: 780 return AArch64::SUBWrx; 781 case AArch64::SUBSXrr: 782 return AArch64::SUBXrr; 783 case AArch64::SUBSXri: 784 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri; 785 case AArch64::SUBSXrs: 786 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs; 787 case AArch64::SUBSXrx: 788 return AArch64::SUBXrx; 789 } 790 } 791 792 /// True when condition code could be modified on the instruction 793 /// trace starting at from and ending at to. 794 static bool modifiesConditionCode(MachineInstr *From, MachineInstr *To, 795 const bool CheckOnlyCCWrites, 796 const TargetRegisterInfo *TRI) { 797 // We iterate backward starting \p To until we hit \p From 798 MachineBasicBlock::iterator I = To, E = From, B = To->getParent()->begin(); 799 800 // Early exit if To is at the beginning of the BB. 801 if (I == B) 802 return true; 803 804 // Check whether the definition of SrcReg is in the same basic block as 805 // Compare. If not, assume the condition code gets modified on some path. 806 if (To->getParent() != From->getParent()) 807 return true; 808 809 // Check that NZCV isn't set on the trace. 810 for (--I; I != E; --I) { 811 const MachineInstr &Instr = *I; 812 813 if (Instr.modifiesRegister(AArch64::NZCV, TRI) || 814 (!CheckOnlyCCWrites && Instr.readsRegister(AArch64::NZCV, TRI))) 815 // This instruction modifies or uses NZCV after the one we want to 816 // change. 817 return true; 818 if (I == B) 819 // We currently don't allow the instruction trace to cross basic 820 // block boundaries 821 return true; 822 } 823 return false; 824 } 825 /// optimizeCompareInstr - Convert the instruction supplying the argument to the 826 /// comparison into one that sets the zero bit in the flags register. 827 bool AArch64InstrInfo::optimizeCompareInstr( 828 MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, 829 int CmpValue, const MachineRegisterInfo *MRI) const { 830 831 // Replace SUBSWrr with SUBWrr if NZCV is not used. 832 int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true); 833 if (Cmp_NZCV != -1) { 834 if (CmpInstr->definesRegister(AArch64::WZR) || 835 CmpInstr->definesRegister(AArch64::XZR)) { 836 CmpInstr->eraseFromParent(); 837 return true; 838 } 839 unsigned Opc = CmpInstr->getOpcode(); 840 unsigned NewOpc = convertFlagSettingOpcode(CmpInstr); 841 if (NewOpc == Opc) 842 return false; 843 const MCInstrDesc &MCID = get(NewOpc); 844 CmpInstr->setDesc(MCID); 845 CmpInstr->RemoveOperand(Cmp_NZCV); 846 bool succeeded = UpdateOperandRegClass(CmpInstr); 847 (void)succeeded; 848 assert(succeeded && "Some operands reg class are incompatible!"); 849 return true; 850 } 851 852 // Continue only if we have a "ri" where immediate is zero. 853 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare 854 // function. 855 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); 856 if (CmpValue != 0 || SrcReg2 != 0) 857 return false; 858 859 // CmpInstr is a Compare instruction if destination register is not used. 860 if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg())) 861 return false; 862 863 // Get the unique definition of SrcReg. 864 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 865 if (!MI) 866 return false; 867 868 bool CheckOnlyCCWrites = false; 869 const TargetRegisterInfo *TRI = &getRegisterInfo(); 870 if (modifiesConditionCode(MI, CmpInstr, CheckOnlyCCWrites, TRI)) 871 return false; 872 873 unsigned NewOpc = MI->getOpcode(); 874 switch (MI->getOpcode()) { 875 default: 876 return false; 877 case AArch64::ADDSWrr: 878 case AArch64::ADDSWri: 879 case AArch64::ADDSXrr: 880 case AArch64::ADDSXri: 881 case AArch64::SUBSWrr: 882 case AArch64::SUBSWri: 883 case AArch64::SUBSXrr: 884 case AArch64::SUBSXri: 885 break; 886 case AArch64::ADDWrr: NewOpc = AArch64::ADDSWrr; break; 887 case AArch64::ADDWri: NewOpc = AArch64::ADDSWri; break; 888 case AArch64::ADDXrr: NewOpc = AArch64::ADDSXrr; break; 889 case AArch64::ADDXri: NewOpc = AArch64::ADDSXri; break; 890 case AArch64::ADCWr: NewOpc = AArch64::ADCSWr; break; 891 case AArch64::ADCXr: NewOpc = AArch64::ADCSXr; break; 892 case AArch64::SUBWrr: NewOpc = AArch64::SUBSWrr; break; 893 case AArch64::SUBWri: NewOpc = AArch64::SUBSWri; break; 894 case AArch64::SUBXrr: NewOpc = AArch64::SUBSXrr; break; 895 case AArch64::SUBXri: NewOpc = AArch64::SUBSXri; break; 896 case AArch64::SBCWr: NewOpc = AArch64::SBCSWr; break; 897 case AArch64::SBCXr: NewOpc = AArch64::SBCSXr; break; 898 case AArch64::ANDWri: NewOpc = AArch64::ANDSWri; break; 899 case AArch64::ANDXri: NewOpc = AArch64::ANDSXri; break; 900 } 901 902 // Scan forward for the use of NZCV. 903 // When checking against MI: if it's a conditional code requires 904 // checking of V bit, then this is not safe to do. 905 // It is safe to remove CmpInstr if NZCV is redefined or killed. 906 // If we are done with the basic block, we need to check whether NZCV is 907 // live-out. 908 bool IsSafe = false; 909 for (MachineBasicBlock::iterator I = CmpInstr, 910 E = CmpInstr->getParent()->end(); 911 !IsSafe && ++I != E;) { 912 const MachineInstr &Instr = *I; 913 for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO; 914 ++IO) { 915 const MachineOperand &MO = Instr.getOperand(IO); 916 if (MO.isRegMask() && MO.clobbersPhysReg(AArch64::NZCV)) { 917 IsSafe = true; 918 break; 919 } 920 if (!MO.isReg() || MO.getReg() != AArch64::NZCV) 921 continue; 922 if (MO.isDef()) { 923 IsSafe = true; 924 break; 925 } 926 927 // Decode the condition code. 928 unsigned Opc = Instr.getOpcode(); 929 AArch64CC::CondCode CC; 930 switch (Opc) { 931 default: 932 return false; 933 case AArch64::Bcc: 934 CC = (AArch64CC::CondCode)Instr.getOperand(IO - 2).getImm(); 935 break; 936 case AArch64::CSINVWr: 937 case AArch64::CSINVXr: 938 case AArch64::CSINCWr: 939 case AArch64::CSINCXr: 940 case AArch64::CSELWr: 941 case AArch64::CSELXr: 942 case AArch64::CSNEGWr: 943 case AArch64::CSNEGXr: 944 case AArch64::FCSELSrrr: 945 case AArch64::FCSELDrrr: 946 CC = (AArch64CC::CondCode)Instr.getOperand(IO - 1).getImm(); 947 break; 948 } 949 950 // It is not safe to remove Compare instruction if Overflow(V) is used. 951 switch (CC) { 952 default: 953 // NZCV can be used multiple times, we should continue. 954 break; 955 case AArch64CC::VS: 956 case AArch64CC::VC: 957 case AArch64CC::GE: 958 case AArch64CC::LT: 959 case AArch64CC::GT: 960 case AArch64CC::LE: 961 return false; 962 } 963 } 964 } 965 966 // If NZCV is not killed nor re-defined, we should check whether it is 967 // live-out. If it is live-out, do not optimize. 968 if (!IsSafe) { 969 MachineBasicBlock *ParentBlock = CmpInstr->getParent(); 970 for (auto *MBB : ParentBlock->successors()) 971 if (MBB->isLiveIn(AArch64::NZCV)) 972 return false; 973 } 974 975 // Update the instruction to set NZCV. 976 MI->setDesc(get(NewOpc)); 977 CmpInstr->eraseFromParent(); 978 bool succeeded = UpdateOperandRegClass(MI); 979 (void)succeeded; 980 assert(succeeded && "Some operands reg class are incompatible!"); 981 MI->addRegisterDefined(AArch64::NZCV, TRI); 982 return true; 983 } 984 985 bool 986 AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { 987 if (MI->getOpcode() != TargetOpcode::LOAD_STACK_GUARD) 988 return false; 989 990 MachineBasicBlock &MBB = *MI->getParent(); 991 DebugLoc DL = MI->getDebugLoc(); 992 unsigned Reg = MI->getOperand(0).getReg(); 993 const GlobalValue *GV = 994 cast<GlobalValue>((*MI->memoperands_begin())->getValue()); 995 const TargetMachine &TM = MBB.getParent()->getTarget(); 996 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); 997 const unsigned char MO_NC = AArch64II::MO_NC; 998 999 if ((OpFlags & AArch64II::MO_GOT) != 0) { 1000 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) 1001 .addGlobalAddress(GV, 0, AArch64II::MO_GOT); 1002 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1003 .addReg(Reg, RegState::Kill).addImm(0) 1004 .addMemOperand(*MI->memoperands_begin()); 1005 } else if (TM.getCodeModel() == CodeModel::Large) { 1006 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) 1007 .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48); 1008 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1009 .addReg(Reg, RegState::Kill) 1010 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32); 1011 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1012 .addReg(Reg, RegState::Kill) 1013 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16); 1014 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1015 .addReg(Reg, RegState::Kill) 1016 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0); 1017 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1018 .addReg(Reg, RegState::Kill).addImm(0) 1019 .addMemOperand(*MI->memoperands_begin()); 1020 } else { 1021 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) 1022 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); 1023 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; 1024 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1025 .addReg(Reg, RegState::Kill) 1026 .addGlobalAddress(GV, 0, LoFlags) 1027 .addMemOperand(*MI->memoperands_begin()); 1028 } 1029 1030 MBB.erase(MI); 1031 1032 return true; 1033 } 1034 1035 /// Return true if this is this instruction has a non-zero immediate 1036 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const { 1037 switch (MI->getOpcode()) { 1038 default: 1039 break; 1040 case AArch64::ADDSWrs: 1041 case AArch64::ADDSXrs: 1042 case AArch64::ADDWrs: 1043 case AArch64::ADDXrs: 1044 case AArch64::ANDSWrs: 1045 case AArch64::ANDSXrs: 1046 case AArch64::ANDWrs: 1047 case AArch64::ANDXrs: 1048 case AArch64::BICSWrs: 1049 case AArch64::BICSXrs: 1050 case AArch64::BICWrs: 1051 case AArch64::BICXrs: 1052 case AArch64::CRC32Brr: 1053 case AArch64::CRC32CBrr: 1054 case AArch64::CRC32CHrr: 1055 case AArch64::CRC32CWrr: 1056 case AArch64::CRC32CXrr: 1057 case AArch64::CRC32Hrr: 1058 case AArch64::CRC32Wrr: 1059 case AArch64::CRC32Xrr: 1060 case AArch64::EONWrs: 1061 case AArch64::EONXrs: 1062 case AArch64::EORWrs: 1063 case AArch64::EORXrs: 1064 case AArch64::ORNWrs: 1065 case AArch64::ORNXrs: 1066 case AArch64::ORRWrs: 1067 case AArch64::ORRXrs: 1068 case AArch64::SUBSWrs: 1069 case AArch64::SUBSXrs: 1070 case AArch64::SUBWrs: 1071 case AArch64::SUBXrs: 1072 if (MI->getOperand(3).isImm()) { 1073 unsigned val = MI->getOperand(3).getImm(); 1074 return (val != 0); 1075 } 1076 break; 1077 } 1078 return false; 1079 } 1080 1081 /// Return true if this is this instruction has a non-zero immediate 1082 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr *MI) const { 1083 switch (MI->getOpcode()) { 1084 default: 1085 break; 1086 case AArch64::ADDSWrx: 1087 case AArch64::ADDSXrx: 1088 case AArch64::ADDSXrx64: 1089 case AArch64::ADDWrx: 1090 case AArch64::ADDXrx: 1091 case AArch64::ADDXrx64: 1092 case AArch64::SUBSWrx: 1093 case AArch64::SUBSXrx: 1094 case AArch64::SUBSXrx64: 1095 case AArch64::SUBWrx: 1096 case AArch64::SUBXrx: 1097 case AArch64::SUBXrx64: 1098 if (MI->getOperand(3).isImm()) { 1099 unsigned val = MI->getOperand(3).getImm(); 1100 return (val != 0); 1101 } 1102 break; 1103 } 1104 1105 return false; 1106 } 1107 1108 // Return true if this instruction simply sets its single destination register 1109 // to zero. This is equivalent to a register rename of the zero-register. 1110 bool AArch64InstrInfo::isGPRZero(const MachineInstr *MI) const { 1111 switch (MI->getOpcode()) { 1112 default: 1113 break; 1114 case AArch64::MOVZWi: 1115 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) 1116 if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) { 1117 assert(MI->getDesc().getNumOperands() == 3 && 1118 MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands"); 1119 return true; 1120 } 1121 break; 1122 case AArch64::ANDWri: // and Rd, Rzr, #imm 1123 return MI->getOperand(1).getReg() == AArch64::WZR; 1124 case AArch64::ANDXri: 1125 return MI->getOperand(1).getReg() == AArch64::XZR; 1126 case TargetOpcode::COPY: 1127 return MI->getOperand(1).getReg() == AArch64::WZR; 1128 } 1129 return false; 1130 } 1131 1132 // Return true if this instruction simply renames a general register without 1133 // modifying bits. 1134 bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const { 1135 switch (MI->getOpcode()) { 1136 default: 1137 break; 1138 case TargetOpcode::COPY: { 1139 // GPR32 copies will by lowered to ORRXrs 1140 unsigned DstReg = MI->getOperand(0).getReg(); 1141 return (AArch64::GPR32RegClass.contains(DstReg) || 1142 AArch64::GPR64RegClass.contains(DstReg)); 1143 } 1144 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) 1145 if (MI->getOperand(1).getReg() == AArch64::XZR) { 1146 assert(MI->getDesc().getNumOperands() == 4 && 1147 MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands"); 1148 return true; 1149 } 1150 break; 1151 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) 1152 if (MI->getOperand(2).getImm() == 0) { 1153 assert(MI->getDesc().getNumOperands() == 4 && 1154 MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands"); 1155 return true; 1156 } 1157 break; 1158 } 1159 return false; 1160 } 1161 1162 // Return true if this instruction simply renames a general register without 1163 // modifying bits. 1164 bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const { 1165 switch (MI->getOpcode()) { 1166 default: 1167 break; 1168 case TargetOpcode::COPY: { 1169 // FPR64 copies will by lowered to ORR.16b 1170 unsigned DstReg = MI->getOperand(0).getReg(); 1171 return (AArch64::FPR64RegClass.contains(DstReg) || 1172 AArch64::FPR128RegClass.contains(DstReg)); 1173 } 1174 case AArch64::ORRv16i8: 1175 if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { 1176 assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() && 1177 "invalid ORRv16i8 operands"); 1178 return true; 1179 } 1180 break; 1181 } 1182 return false; 1183 } 1184 1185 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 1186 int &FrameIndex) const { 1187 switch (MI->getOpcode()) { 1188 default: 1189 break; 1190 case AArch64::LDRWui: 1191 case AArch64::LDRXui: 1192 case AArch64::LDRBui: 1193 case AArch64::LDRHui: 1194 case AArch64::LDRSui: 1195 case AArch64::LDRDui: 1196 case AArch64::LDRQui: 1197 if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && 1198 MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { 1199 FrameIndex = MI->getOperand(1).getIndex(); 1200 return MI->getOperand(0).getReg(); 1201 } 1202 break; 1203 } 1204 1205 return 0; 1206 } 1207 1208 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr *MI, 1209 int &FrameIndex) const { 1210 switch (MI->getOpcode()) { 1211 default: 1212 break; 1213 case AArch64::STRWui: 1214 case AArch64::STRXui: 1215 case AArch64::STRBui: 1216 case AArch64::STRHui: 1217 case AArch64::STRSui: 1218 case AArch64::STRDui: 1219 case AArch64::STRQui: 1220 if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && 1221 MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { 1222 FrameIndex = MI->getOperand(1).getIndex(); 1223 return MI->getOperand(0).getReg(); 1224 } 1225 break; 1226 } 1227 return 0; 1228 } 1229 1230 /// Return true if this is load/store scales or extends its register offset. 1231 /// This refers to scaling a dynamic index as opposed to scaled immediates. 1232 /// MI should be a memory op that allows scaled addressing. 1233 bool AArch64InstrInfo::isScaledAddr(const MachineInstr *MI) const { 1234 switch (MI->getOpcode()) { 1235 default: 1236 break; 1237 case AArch64::LDRBBroW: 1238 case AArch64::LDRBroW: 1239 case AArch64::LDRDroW: 1240 case AArch64::LDRHHroW: 1241 case AArch64::LDRHroW: 1242 case AArch64::LDRQroW: 1243 case AArch64::LDRSBWroW: 1244 case AArch64::LDRSBXroW: 1245 case AArch64::LDRSHWroW: 1246 case AArch64::LDRSHXroW: 1247 case AArch64::LDRSWroW: 1248 case AArch64::LDRSroW: 1249 case AArch64::LDRWroW: 1250 case AArch64::LDRXroW: 1251 case AArch64::STRBBroW: 1252 case AArch64::STRBroW: 1253 case AArch64::STRDroW: 1254 case AArch64::STRHHroW: 1255 case AArch64::STRHroW: 1256 case AArch64::STRQroW: 1257 case AArch64::STRSroW: 1258 case AArch64::STRWroW: 1259 case AArch64::STRXroW: 1260 case AArch64::LDRBBroX: 1261 case AArch64::LDRBroX: 1262 case AArch64::LDRDroX: 1263 case AArch64::LDRHHroX: 1264 case AArch64::LDRHroX: 1265 case AArch64::LDRQroX: 1266 case AArch64::LDRSBWroX: 1267 case AArch64::LDRSBXroX: 1268 case AArch64::LDRSHWroX: 1269 case AArch64::LDRSHXroX: 1270 case AArch64::LDRSWroX: 1271 case AArch64::LDRSroX: 1272 case AArch64::LDRWroX: 1273 case AArch64::LDRXroX: 1274 case AArch64::STRBBroX: 1275 case AArch64::STRBroX: 1276 case AArch64::STRDroX: 1277 case AArch64::STRHHroX: 1278 case AArch64::STRHroX: 1279 case AArch64::STRQroX: 1280 case AArch64::STRSroX: 1281 case AArch64::STRWroX: 1282 case AArch64::STRXroX: 1283 1284 unsigned Val = MI->getOperand(3).getImm(); 1285 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val); 1286 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val); 1287 } 1288 return false; 1289 } 1290 1291 /// Check all MachineMemOperands for a hint to suppress pairing. 1292 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const { 1293 assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && 1294 "Too many target MO flags"); 1295 for (auto *MM : MI->memoperands()) { 1296 if (MM->getFlags() & 1297 (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) { 1298 return true; 1299 } 1300 } 1301 return false; 1302 } 1303 1304 /// Set a flag on the first MachineMemOperand to suppress pairing. 1305 void AArch64InstrInfo::suppressLdStPair(MachineInstr *MI) const { 1306 if (MI->memoperands_empty()) 1307 return; 1308 1309 assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && 1310 "Too many target MO flags"); 1311 (*MI->memoperands_begin()) 1312 ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit); 1313 } 1314 1315 bool 1316 AArch64InstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, 1317 unsigned &Offset, 1318 const TargetRegisterInfo *TRI) const { 1319 switch (LdSt->getOpcode()) { 1320 default: 1321 return false; 1322 case AArch64::STRSui: 1323 case AArch64::STRDui: 1324 case AArch64::STRQui: 1325 case AArch64::STRXui: 1326 case AArch64::STRWui: 1327 case AArch64::LDRSui: 1328 case AArch64::LDRDui: 1329 case AArch64::LDRQui: 1330 case AArch64::LDRXui: 1331 case AArch64::LDRWui: 1332 if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) 1333 return false; 1334 BaseReg = LdSt->getOperand(1).getReg(); 1335 MachineFunction &MF = *LdSt->getParent()->getParent(); 1336 unsigned Width = getRegClass(LdSt->getDesc(), 0, TRI, MF)->getSize(); 1337 Offset = LdSt->getOperand(2).getImm() * Width; 1338 return true; 1339 }; 1340 } 1341 1342 bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( 1343 MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width, 1344 const TargetRegisterInfo *TRI) const { 1345 // Handle only loads/stores with base register followed by immediate offset. 1346 if (LdSt->getNumOperands() != 3) 1347 return false; 1348 if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) 1349 return false; 1350 1351 // Offset is calculated as the immediate operand multiplied by the scaling factor. 1352 // Unscaled instructions have scaling factor set to 1. 1353 int Scale = 0; 1354 switch (LdSt->getOpcode()) { 1355 default: 1356 return false; 1357 case AArch64::LDURQi: 1358 case AArch64::STURQi: 1359 Width = 16; 1360 Scale = 1; 1361 break; 1362 case AArch64::LDURXi: 1363 case AArch64::LDURDi: 1364 case AArch64::STURXi: 1365 case AArch64::STURDi: 1366 Width = 8; 1367 Scale = 1; 1368 break; 1369 case AArch64::LDURWi: 1370 case AArch64::LDURSi: 1371 case AArch64::LDURSWi: 1372 case AArch64::STURWi: 1373 case AArch64::STURSi: 1374 Width = 4; 1375 Scale = 1; 1376 break; 1377 case AArch64::LDURHi: 1378 case AArch64::LDURHHi: 1379 case AArch64::LDURSHXi: 1380 case AArch64::LDURSHWi: 1381 case AArch64::STURHi: 1382 case AArch64::STURHHi: 1383 Width = 2; 1384 Scale = 1; 1385 break; 1386 case AArch64::LDURBi: 1387 case AArch64::LDURBBi: 1388 case AArch64::LDURSBXi: 1389 case AArch64::LDURSBWi: 1390 case AArch64::STURBi: 1391 case AArch64::STURBBi: 1392 Width = 1; 1393 Scale = 1; 1394 break; 1395 case AArch64::LDRQui: 1396 case AArch64::STRQui: 1397 Scale = Width = 16; 1398 break; 1399 case AArch64::LDRXui: 1400 case AArch64::LDRDui: 1401 case AArch64::STRXui: 1402 case AArch64::STRDui: 1403 Scale = Width = 8; 1404 break; 1405 case AArch64::LDRWui: 1406 case AArch64::LDRSui: 1407 case AArch64::STRWui: 1408 case AArch64::STRSui: 1409 Scale = Width = 4; 1410 break; 1411 case AArch64::LDRHui: 1412 case AArch64::LDRHHui: 1413 case AArch64::STRHui: 1414 case AArch64::STRHHui: 1415 Scale = Width = 2; 1416 break; 1417 case AArch64::LDRBui: 1418 case AArch64::LDRBBui: 1419 case AArch64::STRBui: 1420 case AArch64::STRBBui: 1421 Scale = Width = 1; 1422 break; 1423 }; 1424 1425 BaseReg = LdSt->getOperand(1).getReg(); 1426 Offset = LdSt->getOperand(2).getImm() * Scale; 1427 return true; 1428 } 1429 1430 /// Detect opportunities for ldp/stp formation. 1431 /// 1432 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true. 1433 bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, 1434 MachineInstr *SecondLdSt, 1435 unsigned NumLoads) const { 1436 // Only cluster up to a single pair. 1437 if (NumLoads > 1) 1438 return false; 1439 if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode()) 1440 return false; 1441 // getMemOpBaseRegImmOfs guarantees that oper 2 isImm. 1442 unsigned Ofs1 = FirstLdSt->getOperand(2).getImm(); 1443 // Allow 6 bits of positive range. 1444 if (Ofs1 > 64) 1445 return false; 1446 // The caller should already have ordered First/SecondLdSt by offset. 1447 unsigned Ofs2 = SecondLdSt->getOperand(2).getImm(); 1448 return Ofs1 + 1 == Ofs2; 1449 } 1450 1451 bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First, 1452 MachineInstr *Second) const { 1453 if (Subtarget.isCyclone()) { 1454 // Cyclone can fuse CMN, CMP, TST followed by Bcc. 1455 unsigned SecondOpcode = Second->getOpcode(); 1456 if (SecondOpcode == AArch64::Bcc) { 1457 switch (First->getOpcode()) { 1458 default: 1459 return false; 1460 case AArch64::SUBSWri: 1461 case AArch64::ADDSWri: 1462 case AArch64::ANDSWri: 1463 case AArch64::SUBSXri: 1464 case AArch64::ADDSXri: 1465 case AArch64::ANDSXri: 1466 return true; 1467 } 1468 } 1469 // Cyclone B0 also supports ALU operations followed by CBZ/CBNZ. 1470 if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX || 1471 SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) { 1472 switch (First->getOpcode()) { 1473 default: 1474 return false; 1475 case AArch64::ADDWri: 1476 case AArch64::ADDXri: 1477 case AArch64::ANDWri: 1478 case AArch64::ANDXri: 1479 case AArch64::EORWri: 1480 case AArch64::EORXri: 1481 case AArch64::ORRWri: 1482 case AArch64::ORRXri: 1483 case AArch64::SUBWri: 1484 case AArch64::SUBXri: 1485 return true; 1486 } 1487 } 1488 } 1489 return false; 1490 } 1491 1492 MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue( 1493 MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var, 1494 const MDNode *Expr, DebugLoc DL) const { 1495 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) 1496 .addFrameIndex(FrameIx) 1497 .addImm(0) 1498 .addImm(Offset) 1499 .addMetadata(Var) 1500 .addMetadata(Expr); 1501 return &*MIB; 1502 } 1503 1504 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, 1505 unsigned Reg, unsigned SubIdx, 1506 unsigned State, 1507 const TargetRegisterInfo *TRI) { 1508 if (!SubIdx) 1509 return MIB.addReg(Reg, State); 1510 1511 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1512 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 1513 return MIB.addReg(Reg, State, SubIdx); 1514 } 1515 1516 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, 1517 unsigned NumRegs) { 1518 // We really want the positive remainder mod 32 here, that happens to be 1519 // easily obtainable with a mask. 1520 return ((DestReg - SrcReg) & 0x1f) < NumRegs; 1521 } 1522 1523 void AArch64InstrInfo::copyPhysRegTuple( 1524 MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, 1525 unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, 1526 llvm::ArrayRef<unsigned> Indices) const { 1527 assert(Subtarget.hasNEON() && 1528 "Unexpected register copy without NEON"); 1529 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1530 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 1531 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 1532 unsigned NumRegs = Indices.size(); 1533 1534 int SubReg = 0, End = NumRegs, Incr = 1; 1535 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { 1536 SubReg = NumRegs - 1; 1537 End = -1; 1538 Incr = -1; 1539 } 1540 1541 for (; SubReg != End; SubReg += Incr) { 1542 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 1543 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 1544 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); 1545 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 1546 } 1547 } 1548 1549 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 1550 MachineBasicBlock::iterator I, DebugLoc DL, 1551 unsigned DestReg, unsigned SrcReg, 1552 bool KillSrc) const { 1553 if (AArch64::GPR32spRegClass.contains(DestReg) && 1554 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { 1555 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1556 1557 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { 1558 // If either operand is WSP, expand to ADD #0. 1559 if (Subtarget.hasZeroCycleRegMove()) { 1560 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. 1561 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 1562 &AArch64::GPR64spRegClass); 1563 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 1564 &AArch64::GPR64spRegClass); 1565 // This instruction is reading and writing X registers. This may upset 1566 // the register scavenger and machine verifier, so we need to indicate 1567 // that we are reading an undefined value from SrcRegX, but a proper 1568 // value from SrcReg. 1569 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) 1570 .addReg(SrcRegX, RegState::Undef) 1571 .addImm(0) 1572 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 1573 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 1574 } else { 1575 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) 1576 .addReg(SrcReg, getKillRegState(KillSrc)) 1577 .addImm(0) 1578 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1579 } 1580 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) { 1581 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm( 1582 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1583 } else { 1584 if (Subtarget.hasZeroCycleRegMove()) { 1585 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. 1586 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 1587 &AArch64::GPR64spRegClass); 1588 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 1589 &AArch64::GPR64spRegClass); 1590 // This instruction is reading and writing X registers. This may upset 1591 // the register scavenger and machine verifier, so we need to indicate 1592 // that we are reading an undefined value from SrcRegX, but a proper 1593 // value from SrcReg. 1594 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) 1595 .addReg(AArch64::XZR) 1596 .addReg(SrcRegX, RegState::Undef) 1597 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 1598 } else { 1599 // Otherwise, expand to ORR WZR. 1600 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) 1601 .addReg(AArch64::WZR) 1602 .addReg(SrcReg, getKillRegState(KillSrc)); 1603 } 1604 } 1605 return; 1606 } 1607 1608 if (AArch64::GPR64spRegClass.contains(DestReg) && 1609 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { 1610 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { 1611 // If either operand is SP, expand to ADD #0. 1612 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) 1613 .addReg(SrcReg, getKillRegState(KillSrc)) 1614 .addImm(0) 1615 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1616 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) { 1617 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm( 1618 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1619 } else { 1620 // Otherwise, expand to ORR XZR. 1621 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) 1622 .addReg(AArch64::XZR) 1623 .addReg(SrcReg, getKillRegState(KillSrc)); 1624 } 1625 return; 1626 } 1627 1628 // Copy a DDDD register quad by copying the individual sub-registers. 1629 if (AArch64::DDDDRegClass.contains(DestReg) && 1630 AArch64::DDDDRegClass.contains(SrcReg)) { 1631 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, 1632 AArch64::dsub2, AArch64::dsub3 }; 1633 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 1634 Indices); 1635 return; 1636 } 1637 1638 // Copy a DDD register triple by copying the individual sub-registers. 1639 if (AArch64::DDDRegClass.contains(DestReg) && 1640 AArch64::DDDRegClass.contains(SrcReg)) { 1641 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, 1642 AArch64::dsub2 }; 1643 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 1644 Indices); 1645 return; 1646 } 1647 1648 // Copy a DD register pair by copying the individual sub-registers. 1649 if (AArch64::DDRegClass.contains(DestReg) && 1650 AArch64::DDRegClass.contains(SrcReg)) { 1651 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 }; 1652 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 1653 Indices); 1654 return; 1655 } 1656 1657 // Copy a QQQQ register quad by copying the individual sub-registers. 1658 if (AArch64::QQQQRegClass.contains(DestReg) && 1659 AArch64::QQQQRegClass.contains(SrcReg)) { 1660 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, 1661 AArch64::qsub2, AArch64::qsub3 }; 1662 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 1663 Indices); 1664 return; 1665 } 1666 1667 // Copy a QQQ register triple by copying the individual sub-registers. 1668 if (AArch64::QQQRegClass.contains(DestReg) && 1669 AArch64::QQQRegClass.contains(SrcReg)) { 1670 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, 1671 AArch64::qsub2 }; 1672 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 1673 Indices); 1674 return; 1675 } 1676 1677 // Copy a QQ register pair by copying the individual sub-registers. 1678 if (AArch64::QQRegClass.contains(DestReg) && 1679 AArch64::QQRegClass.contains(SrcReg)) { 1680 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 }; 1681 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 1682 Indices); 1683 return; 1684 } 1685 1686 if (AArch64::FPR128RegClass.contains(DestReg) && 1687 AArch64::FPR128RegClass.contains(SrcReg)) { 1688 if(Subtarget.hasNEON()) { 1689 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1690 .addReg(SrcReg) 1691 .addReg(SrcReg, getKillRegState(KillSrc)); 1692 } else { 1693 BuildMI(MBB, I, DL, get(AArch64::STRQpre)) 1694 .addReg(AArch64::SP, RegState::Define) 1695 .addReg(SrcReg, getKillRegState(KillSrc)) 1696 .addReg(AArch64::SP) 1697 .addImm(-16); 1698 BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) 1699 .addReg(AArch64::SP, RegState::Define) 1700 .addReg(DestReg, RegState::Define) 1701 .addReg(AArch64::SP) 1702 .addImm(16); 1703 } 1704 return; 1705 } 1706 1707 if (AArch64::FPR64RegClass.contains(DestReg) && 1708 AArch64::FPR64RegClass.contains(SrcReg)) { 1709 if(Subtarget.hasNEON()) { 1710 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, 1711 &AArch64::FPR128RegClass); 1712 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, 1713 &AArch64::FPR128RegClass); 1714 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1715 .addReg(SrcReg) 1716 .addReg(SrcReg, getKillRegState(KillSrc)); 1717 } else { 1718 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) 1719 .addReg(SrcReg, getKillRegState(KillSrc)); 1720 } 1721 return; 1722 } 1723 1724 if (AArch64::FPR32RegClass.contains(DestReg) && 1725 AArch64::FPR32RegClass.contains(SrcReg)) { 1726 if(Subtarget.hasNEON()) { 1727 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, 1728 &AArch64::FPR128RegClass); 1729 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, 1730 &AArch64::FPR128RegClass); 1731 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1732 .addReg(SrcReg) 1733 .addReg(SrcReg, getKillRegState(KillSrc)); 1734 } else { 1735 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 1736 .addReg(SrcReg, getKillRegState(KillSrc)); 1737 } 1738 return; 1739 } 1740 1741 if (AArch64::FPR16RegClass.contains(DestReg) && 1742 AArch64::FPR16RegClass.contains(SrcReg)) { 1743 if(Subtarget.hasNEON()) { 1744 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 1745 &AArch64::FPR128RegClass); 1746 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 1747 &AArch64::FPR128RegClass); 1748 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1749 .addReg(SrcReg) 1750 .addReg(SrcReg, getKillRegState(KillSrc)); 1751 } else { 1752 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 1753 &AArch64::FPR32RegClass); 1754 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 1755 &AArch64::FPR32RegClass); 1756 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 1757 .addReg(SrcReg, getKillRegState(KillSrc)); 1758 } 1759 return; 1760 } 1761 1762 if (AArch64::FPR8RegClass.contains(DestReg) && 1763 AArch64::FPR8RegClass.contains(SrcReg)) { 1764 if(Subtarget.hasNEON()) { 1765 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 1766 &AArch64::FPR128RegClass); 1767 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 1768 &AArch64::FPR128RegClass); 1769 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1770 .addReg(SrcReg) 1771 .addReg(SrcReg, getKillRegState(KillSrc)); 1772 } else { 1773 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 1774 &AArch64::FPR32RegClass); 1775 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 1776 &AArch64::FPR32RegClass); 1777 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 1778 .addReg(SrcReg, getKillRegState(KillSrc)); 1779 } 1780 return; 1781 } 1782 1783 // Copies between GPR64 and FPR64. 1784 if (AArch64::FPR64RegClass.contains(DestReg) && 1785 AArch64::GPR64RegClass.contains(SrcReg)) { 1786 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) 1787 .addReg(SrcReg, getKillRegState(KillSrc)); 1788 return; 1789 } 1790 if (AArch64::GPR64RegClass.contains(DestReg) && 1791 AArch64::FPR64RegClass.contains(SrcReg)) { 1792 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) 1793 .addReg(SrcReg, getKillRegState(KillSrc)); 1794 return; 1795 } 1796 // Copies between GPR32 and FPR32. 1797 if (AArch64::FPR32RegClass.contains(DestReg) && 1798 AArch64::GPR32RegClass.contains(SrcReg)) { 1799 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) 1800 .addReg(SrcReg, getKillRegState(KillSrc)); 1801 return; 1802 } 1803 if (AArch64::GPR32RegClass.contains(DestReg) && 1804 AArch64::FPR32RegClass.contains(SrcReg)) { 1805 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) 1806 .addReg(SrcReg, getKillRegState(KillSrc)); 1807 return; 1808 } 1809 1810 if (DestReg == AArch64::NZCV) { 1811 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); 1812 BuildMI(MBB, I, DL, get(AArch64::MSR)) 1813 .addImm(AArch64SysReg::NZCV) 1814 .addReg(SrcReg, getKillRegState(KillSrc)) 1815 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); 1816 return; 1817 } 1818 1819 if (SrcReg == AArch64::NZCV) { 1820 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); 1821 BuildMI(MBB, I, DL, get(AArch64::MRS)) 1822 .addReg(DestReg) 1823 .addImm(AArch64SysReg::NZCV) 1824 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); 1825 return; 1826 } 1827 1828 llvm_unreachable("unimplemented reg-to-reg copy"); 1829 } 1830 1831 void AArch64InstrInfo::storeRegToStackSlot( 1832 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, 1833 bool isKill, int FI, const TargetRegisterClass *RC, 1834 const TargetRegisterInfo *TRI) const { 1835 DebugLoc DL; 1836 if (MBBI != MBB.end()) 1837 DL = MBBI->getDebugLoc(); 1838 MachineFunction &MF = *MBB.getParent(); 1839 MachineFrameInfo &MFI = *MF.getFrameInfo(); 1840 unsigned Align = MFI.getObjectAlignment(FI); 1841 1842 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 1843 MachineMemOperand *MMO = MF.getMachineMemOperand( 1844 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); 1845 unsigned Opc = 0; 1846 bool Offset = true; 1847 switch (RC->getSize()) { 1848 case 1: 1849 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 1850 Opc = AArch64::STRBui; 1851 break; 1852 case 2: 1853 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 1854 Opc = AArch64::STRHui; 1855 break; 1856 case 4: 1857 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 1858 Opc = AArch64::STRWui; 1859 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 1860 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 1861 else 1862 assert(SrcReg != AArch64::WSP); 1863 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 1864 Opc = AArch64::STRSui; 1865 break; 1866 case 8: 1867 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 1868 Opc = AArch64::STRXui; 1869 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 1870 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 1871 else 1872 assert(SrcReg != AArch64::SP); 1873 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 1874 Opc = AArch64::STRDui; 1875 break; 1876 case 16: 1877 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 1878 Opc = AArch64::STRQui; 1879 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 1880 assert(Subtarget.hasNEON() && 1881 "Unexpected register store without NEON"); 1882 Opc = AArch64::ST1Twov1d, Offset = false; 1883 } 1884 break; 1885 case 24: 1886 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 1887 assert(Subtarget.hasNEON() && 1888 "Unexpected register store without NEON"); 1889 Opc = AArch64::ST1Threev1d, Offset = false; 1890 } 1891 break; 1892 case 32: 1893 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 1894 assert(Subtarget.hasNEON() && 1895 "Unexpected register store without NEON"); 1896 Opc = AArch64::ST1Fourv1d, Offset = false; 1897 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 1898 assert(Subtarget.hasNEON() && 1899 "Unexpected register store without NEON"); 1900 Opc = AArch64::ST1Twov2d, Offset = false; 1901 } 1902 break; 1903 case 48: 1904 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 1905 assert(Subtarget.hasNEON() && 1906 "Unexpected register store without NEON"); 1907 Opc = AArch64::ST1Threev2d, Offset = false; 1908 } 1909 break; 1910 case 64: 1911 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 1912 assert(Subtarget.hasNEON() && 1913 "Unexpected register store without NEON"); 1914 Opc = AArch64::ST1Fourv2d, Offset = false; 1915 } 1916 break; 1917 } 1918 assert(Opc && "Unknown register class"); 1919 1920 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) 1921 .addReg(SrcReg, getKillRegState(isKill)) 1922 .addFrameIndex(FI); 1923 1924 if (Offset) 1925 MI.addImm(0); 1926 MI.addMemOperand(MMO); 1927 } 1928 1929 void AArch64InstrInfo::loadRegFromStackSlot( 1930 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, 1931 int FI, const TargetRegisterClass *RC, 1932 const TargetRegisterInfo *TRI) const { 1933 DebugLoc DL; 1934 if (MBBI != MBB.end()) 1935 DL = MBBI->getDebugLoc(); 1936 MachineFunction &MF = *MBB.getParent(); 1937 MachineFrameInfo &MFI = *MF.getFrameInfo(); 1938 unsigned Align = MFI.getObjectAlignment(FI); 1939 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 1940 MachineMemOperand *MMO = MF.getMachineMemOperand( 1941 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); 1942 1943 unsigned Opc = 0; 1944 bool Offset = true; 1945 switch (RC->getSize()) { 1946 case 1: 1947 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 1948 Opc = AArch64::LDRBui; 1949 break; 1950 case 2: 1951 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 1952 Opc = AArch64::LDRHui; 1953 break; 1954 case 4: 1955 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 1956 Opc = AArch64::LDRWui; 1957 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 1958 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); 1959 else 1960 assert(DestReg != AArch64::WSP); 1961 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 1962 Opc = AArch64::LDRSui; 1963 break; 1964 case 8: 1965 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 1966 Opc = AArch64::LDRXui; 1967 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 1968 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); 1969 else 1970 assert(DestReg != AArch64::SP); 1971 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 1972 Opc = AArch64::LDRDui; 1973 break; 1974 case 16: 1975 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 1976 Opc = AArch64::LDRQui; 1977 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 1978 assert(Subtarget.hasNEON() && 1979 "Unexpected register load without NEON"); 1980 Opc = AArch64::LD1Twov1d, Offset = false; 1981 } 1982 break; 1983 case 24: 1984 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 1985 assert(Subtarget.hasNEON() && 1986 "Unexpected register load without NEON"); 1987 Opc = AArch64::LD1Threev1d, Offset = false; 1988 } 1989 break; 1990 case 32: 1991 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 1992 assert(Subtarget.hasNEON() && 1993 "Unexpected register load without NEON"); 1994 Opc = AArch64::LD1Fourv1d, Offset = false; 1995 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 1996 assert(Subtarget.hasNEON() && 1997 "Unexpected register load without NEON"); 1998 Opc = AArch64::LD1Twov2d, Offset = false; 1999 } 2000 break; 2001 case 48: 2002 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 2003 assert(Subtarget.hasNEON() && 2004 "Unexpected register load without NEON"); 2005 Opc = AArch64::LD1Threev2d, Offset = false; 2006 } 2007 break; 2008 case 64: 2009 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 2010 assert(Subtarget.hasNEON() && 2011 "Unexpected register load without NEON"); 2012 Opc = AArch64::LD1Fourv2d, Offset = false; 2013 } 2014 break; 2015 } 2016 assert(Opc && "Unknown register class"); 2017 2018 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) 2019 .addReg(DestReg, getDefRegState(true)) 2020 .addFrameIndex(FI); 2021 if (Offset) 2022 MI.addImm(0); 2023 MI.addMemOperand(MMO); 2024 } 2025 2026 void llvm::emitFrameOffset(MachineBasicBlock &MBB, 2027 MachineBasicBlock::iterator MBBI, DebugLoc DL, 2028 unsigned DestReg, unsigned SrcReg, int Offset, 2029 const TargetInstrInfo *TII, 2030 MachineInstr::MIFlag Flag, bool SetNZCV) { 2031 if (DestReg == SrcReg && Offset == 0) 2032 return; 2033 2034 bool isSub = Offset < 0; 2035 if (isSub) 2036 Offset = -Offset; 2037 2038 // FIXME: If the offset won't fit in 24-bits, compute the offset into a 2039 // scratch register. If DestReg is a virtual register, use it as the 2040 // scratch register; otherwise, create a new virtual register (to be 2041 // replaced by the scavenger at the end of PEI). That case can be optimized 2042 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch 2043 // register can be loaded with offset%8 and the add/sub can use an extending 2044 // instruction with LSL#3. 2045 // Currently the function handles any offsets but generates a poor sequence 2046 // of code. 2047 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); 2048 2049 unsigned Opc; 2050 if (SetNZCV) 2051 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri; 2052 else 2053 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri; 2054 const unsigned MaxEncoding = 0xfff; 2055 const unsigned ShiftSize = 12; 2056 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; 2057 while (((unsigned)Offset) >= (1 << ShiftSize)) { 2058 unsigned ThisVal; 2059 if (((unsigned)Offset) > MaxEncodableValue) { 2060 ThisVal = MaxEncodableValue; 2061 } else { 2062 ThisVal = Offset & MaxEncodableValue; 2063 } 2064 assert((ThisVal >> ShiftSize) <= MaxEncoding && 2065 "Encoding cannot handle value that big"); 2066 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 2067 .addReg(SrcReg) 2068 .addImm(ThisVal >> ShiftSize) 2069 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize)) 2070 .setMIFlag(Flag); 2071 2072 SrcReg = DestReg; 2073 Offset -= ThisVal; 2074 if (Offset == 0) 2075 return; 2076 } 2077 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 2078 .addReg(SrcReg) 2079 .addImm(Offset) 2080 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 2081 .setMIFlag(Flag); 2082 } 2083 2084 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( 2085 MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, 2086 MachineBasicBlock::iterator InsertPt, int FrameIndex) const { 2087 // This is a bit of a hack. Consider this instruction: 2088 // 2089 // %vreg0<def> = COPY %SP; GPR64all:%vreg0 2090 // 2091 // We explicitly chose GPR64all for the virtual register so such a copy might 2092 // be eliminated by RegisterCoalescer. However, that may not be possible, and 2093 // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all 2094 // register class, TargetInstrInfo::foldMemoryOperand() is going to try. 2095 // 2096 // To prevent that, we are going to constrain the %vreg0 register class here. 2097 // 2098 // <rdar://problem/11522048> 2099 // 2100 if (MI->isCopy()) { 2101 unsigned DstReg = MI->getOperand(0).getReg(); 2102 unsigned SrcReg = MI->getOperand(1).getReg(); 2103 if (SrcReg == AArch64::SP && 2104 TargetRegisterInfo::isVirtualRegister(DstReg)) { 2105 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); 2106 return nullptr; 2107 } 2108 if (DstReg == AArch64::SP && 2109 TargetRegisterInfo::isVirtualRegister(SrcReg)) { 2110 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 2111 return nullptr; 2112 } 2113 } 2114 2115 // Cannot fold. 2116 return nullptr; 2117 } 2118 2119 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, 2120 bool *OutUseUnscaledOp, 2121 unsigned *OutUnscaledOp, 2122 int *EmittableOffset) { 2123 int Scale = 1; 2124 bool IsSigned = false; 2125 // The ImmIdx should be changed case by case if it is not 2. 2126 unsigned ImmIdx = 2; 2127 unsigned UnscaledOp = 0; 2128 // Set output values in case of early exit. 2129 if (EmittableOffset) 2130 *EmittableOffset = 0; 2131 if (OutUseUnscaledOp) 2132 *OutUseUnscaledOp = false; 2133 if (OutUnscaledOp) 2134 *OutUnscaledOp = 0; 2135 switch (MI.getOpcode()) { 2136 default: 2137 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex"); 2138 // Vector spills/fills can't take an immediate offset. 2139 case AArch64::LD1Twov2d: 2140 case AArch64::LD1Threev2d: 2141 case AArch64::LD1Fourv2d: 2142 case AArch64::LD1Twov1d: 2143 case AArch64::LD1Threev1d: 2144 case AArch64::LD1Fourv1d: 2145 case AArch64::ST1Twov2d: 2146 case AArch64::ST1Threev2d: 2147 case AArch64::ST1Fourv2d: 2148 case AArch64::ST1Twov1d: 2149 case AArch64::ST1Threev1d: 2150 case AArch64::ST1Fourv1d: 2151 return AArch64FrameOffsetCannotUpdate; 2152 case AArch64::PRFMui: 2153 Scale = 8; 2154 UnscaledOp = AArch64::PRFUMi; 2155 break; 2156 case AArch64::LDRXui: 2157 Scale = 8; 2158 UnscaledOp = AArch64::LDURXi; 2159 break; 2160 case AArch64::LDRWui: 2161 Scale = 4; 2162 UnscaledOp = AArch64::LDURWi; 2163 break; 2164 case AArch64::LDRBui: 2165 Scale = 1; 2166 UnscaledOp = AArch64::LDURBi; 2167 break; 2168 case AArch64::LDRHui: 2169 Scale = 2; 2170 UnscaledOp = AArch64::LDURHi; 2171 break; 2172 case AArch64::LDRSui: 2173 Scale = 4; 2174 UnscaledOp = AArch64::LDURSi; 2175 break; 2176 case AArch64::LDRDui: 2177 Scale = 8; 2178 UnscaledOp = AArch64::LDURDi; 2179 break; 2180 case AArch64::LDRQui: 2181 Scale = 16; 2182 UnscaledOp = AArch64::LDURQi; 2183 break; 2184 case AArch64::LDRBBui: 2185 Scale = 1; 2186 UnscaledOp = AArch64::LDURBBi; 2187 break; 2188 case AArch64::LDRHHui: 2189 Scale = 2; 2190 UnscaledOp = AArch64::LDURHHi; 2191 break; 2192 case AArch64::LDRSBXui: 2193 Scale = 1; 2194 UnscaledOp = AArch64::LDURSBXi; 2195 break; 2196 case AArch64::LDRSBWui: 2197 Scale = 1; 2198 UnscaledOp = AArch64::LDURSBWi; 2199 break; 2200 case AArch64::LDRSHXui: 2201 Scale = 2; 2202 UnscaledOp = AArch64::LDURSHXi; 2203 break; 2204 case AArch64::LDRSHWui: 2205 Scale = 2; 2206 UnscaledOp = AArch64::LDURSHWi; 2207 break; 2208 case AArch64::LDRSWui: 2209 Scale = 4; 2210 UnscaledOp = AArch64::LDURSWi; 2211 break; 2212 2213 case AArch64::STRXui: 2214 Scale = 8; 2215 UnscaledOp = AArch64::STURXi; 2216 break; 2217 case AArch64::STRWui: 2218 Scale = 4; 2219 UnscaledOp = AArch64::STURWi; 2220 break; 2221 case AArch64::STRBui: 2222 Scale = 1; 2223 UnscaledOp = AArch64::STURBi; 2224 break; 2225 case AArch64::STRHui: 2226 Scale = 2; 2227 UnscaledOp = AArch64::STURHi; 2228 break; 2229 case AArch64::STRSui: 2230 Scale = 4; 2231 UnscaledOp = AArch64::STURSi; 2232 break; 2233 case AArch64::STRDui: 2234 Scale = 8; 2235 UnscaledOp = AArch64::STURDi; 2236 break; 2237 case AArch64::STRQui: 2238 Scale = 16; 2239 UnscaledOp = AArch64::STURQi; 2240 break; 2241 case AArch64::STRBBui: 2242 Scale = 1; 2243 UnscaledOp = AArch64::STURBBi; 2244 break; 2245 case AArch64::STRHHui: 2246 Scale = 2; 2247 UnscaledOp = AArch64::STURHHi; 2248 break; 2249 2250 case AArch64::LDPXi: 2251 case AArch64::LDPDi: 2252 case AArch64::STPXi: 2253 case AArch64::STPDi: 2254 case AArch64::LDNPXi: 2255 case AArch64::LDNPDi: 2256 case AArch64::STNPXi: 2257 case AArch64::STNPDi: 2258 ImmIdx = 3; 2259 IsSigned = true; 2260 Scale = 8; 2261 break; 2262 case AArch64::LDPQi: 2263 case AArch64::STPQi: 2264 case AArch64::LDNPQi: 2265 case AArch64::STNPQi: 2266 ImmIdx = 3; 2267 IsSigned = true; 2268 Scale = 16; 2269 break; 2270 case AArch64::LDPWi: 2271 case AArch64::LDPSi: 2272 case AArch64::STPWi: 2273 case AArch64::STPSi: 2274 case AArch64::LDNPWi: 2275 case AArch64::LDNPSi: 2276 case AArch64::STNPWi: 2277 case AArch64::STNPSi: 2278 ImmIdx = 3; 2279 IsSigned = true; 2280 Scale = 4; 2281 break; 2282 2283 case AArch64::LDURXi: 2284 case AArch64::LDURWi: 2285 case AArch64::LDURBi: 2286 case AArch64::LDURHi: 2287 case AArch64::LDURSi: 2288 case AArch64::LDURDi: 2289 case AArch64::LDURQi: 2290 case AArch64::LDURHHi: 2291 case AArch64::LDURBBi: 2292 case AArch64::LDURSBXi: 2293 case AArch64::LDURSBWi: 2294 case AArch64::LDURSHXi: 2295 case AArch64::LDURSHWi: 2296 case AArch64::LDURSWi: 2297 case AArch64::STURXi: 2298 case AArch64::STURWi: 2299 case AArch64::STURBi: 2300 case AArch64::STURHi: 2301 case AArch64::STURSi: 2302 case AArch64::STURDi: 2303 case AArch64::STURQi: 2304 case AArch64::STURBBi: 2305 case AArch64::STURHHi: 2306 Scale = 1; 2307 break; 2308 } 2309 2310 Offset += MI.getOperand(ImmIdx).getImm() * Scale; 2311 2312 bool useUnscaledOp = false; 2313 // If the offset doesn't match the scale, we rewrite the instruction to 2314 // use the unscaled instruction instead. Likewise, if we have a negative 2315 // offset (and have an unscaled op to use). 2316 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0)) 2317 useUnscaledOp = true; 2318 2319 // Use an unscaled addressing mode if the instruction has a negative offset 2320 // (or if the instruction is already using an unscaled addressing mode). 2321 unsigned MaskBits; 2322 if (IsSigned) { 2323 // ldp/stp instructions. 2324 MaskBits = 7; 2325 Offset /= Scale; 2326 } else if (UnscaledOp == 0 || useUnscaledOp) { 2327 MaskBits = 9; 2328 IsSigned = true; 2329 Scale = 1; 2330 } else { 2331 MaskBits = 12; 2332 IsSigned = false; 2333 Offset /= Scale; 2334 } 2335 2336 // Attempt to fold address computation. 2337 int MaxOff = (1 << (MaskBits - IsSigned)) - 1; 2338 int MinOff = (IsSigned ? (-MaxOff - 1) : 0); 2339 if (Offset >= MinOff && Offset <= MaxOff) { 2340 if (EmittableOffset) 2341 *EmittableOffset = Offset; 2342 Offset = 0; 2343 } else { 2344 int NewOff = Offset < 0 ? MinOff : MaxOff; 2345 if (EmittableOffset) 2346 *EmittableOffset = NewOff; 2347 Offset = (Offset - NewOff) * Scale; 2348 } 2349 if (OutUseUnscaledOp) 2350 *OutUseUnscaledOp = useUnscaledOp; 2351 if (OutUnscaledOp) 2352 *OutUnscaledOp = UnscaledOp; 2353 return AArch64FrameOffsetCanUpdate | 2354 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0); 2355 } 2356 2357 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 2358 unsigned FrameReg, int &Offset, 2359 const AArch64InstrInfo *TII) { 2360 unsigned Opcode = MI.getOpcode(); 2361 unsigned ImmIdx = FrameRegIdx + 1; 2362 2363 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { 2364 Offset += MI.getOperand(ImmIdx).getImm(); 2365 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), 2366 MI.getOperand(0).getReg(), FrameReg, Offset, TII, 2367 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); 2368 MI.eraseFromParent(); 2369 Offset = 0; 2370 return true; 2371 } 2372 2373 int NewOffset; 2374 unsigned UnscaledOp; 2375 bool UseUnscaledOp; 2376 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, 2377 &UnscaledOp, &NewOffset); 2378 if (Status & AArch64FrameOffsetCanUpdate) { 2379 if (Status & AArch64FrameOffsetIsLegal) 2380 // Replace the FrameIndex with FrameReg. 2381 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 2382 if (UseUnscaledOp) 2383 MI.setDesc(TII->get(UnscaledOp)); 2384 2385 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); 2386 return Offset == 0; 2387 } 2388 2389 return false; 2390 } 2391 2392 void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { 2393 NopInst.setOpcode(AArch64::HINT); 2394 NopInst.addOperand(MCOperand::createImm(0)); 2395 } 2396 /// useMachineCombiner - return true when a target supports MachineCombiner 2397 bool AArch64InstrInfo::useMachineCombiner() const { 2398 // AArch64 supports the combiner 2399 return true; 2400 } 2401 // 2402 // True when Opc sets flag 2403 static bool isCombineInstrSettingFlag(unsigned Opc) { 2404 switch (Opc) { 2405 case AArch64::ADDSWrr: 2406 case AArch64::ADDSWri: 2407 case AArch64::ADDSXrr: 2408 case AArch64::ADDSXri: 2409 case AArch64::SUBSWrr: 2410 case AArch64::SUBSXrr: 2411 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 2412 case AArch64::SUBSWri: 2413 case AArch64::SUBSXri: 2414 return true; 2415 default: 2416 break; 2417 } 2418 return false; 2419 } 2420 // 2421 // 32b Opcodes that can be combined with a MUL 2422 static bool isCombineInstrCandidate32(unsigned Opc) { 2423 switch (Opc) { 2424 case AArch64::ADDWrr: 2425 case AArch64::ADDWri: 2426 case AArch64::SUBWrr: 2427 case AArch64::ADDSWrr: 2428 case AArch64::ADDSWri: 2429 case AArch64::SUBSWrr: 2430 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 2431 case AArch64::SUBWri: 2432 case AArch64::SUBSWri: 2433 return true; 2434 default: 2435 break; 2436 } 2437 return false; 2438 } 2439 // 2440 // 64b Opcodes that can be combined with a MUL 2441 static bool isCombineInstrCandidate64(unsigned Opc) { 2442 switch (Opc) { 2443 case AArch64::ADDXrr: 2444 case AArch64::ADDXri: 2445 case AArch64::SUBXrr: 2446 case AArch64::ADDSXrr: 2447 case AArch64::ADDSXri: 2448 case AArch64::SUBSXrr: 2449 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 2450 case AArch64::SUBXri: 2451 case AArch64::SUBSXri: 2452 return true; 2453 default: 2454 break; 2455 } 2456 return false; 2457 } 2458 // 2459 // Opcodes that can be combined with a MUL 2460 static bool isCombineInstrCandidate(unsigned Opc) { 2461 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc)); 2462 } 2463 2464 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, 2465 unsigned MulOpc, unsigned ZeroReg) { 2466 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2467 MachineInstr *MI = nullptr; 2468 // We need a virtual register definition. 2469 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) 2470 MI = MRI.getUniqueVRegDef(MO.getReg()); 2471 // And it needs to be in the trace (otherwise, it won't have a depth). 2472 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != MulOpc) 2473 return false; 2474 2475 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && 2476 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && 2477 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs"); 2478 2479 // The third input reg must be zero. 2480 if (MI->getOperand(3).getReg() != ZeroReg) 2481 return false; 2482 2483 // Must only used by the user we combine with. 2484 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 2485 return false; 2486 2487 return true; 2488 } 2489 2490 /// Return true when there is potentially a faster code sequence 2491 /// for an instruction chain ending in \p Root. All potential patterns are 2492 /// listed 2493 /// in the \p Pattern vector. Pattern should be sorted in priority order since 2494 /// the pattern evaluator stops checking as soon as it finds a faster sequence. 2495 2496 bool AArch64InstrInfo::getMachineCombinerPatterns( 2497 MachineInstr &Root, 2498 SmallVectorImpl<MachineCombinerPattern> &Patterns) const { 2499 unsigned Opc = Root.getOpcode(); 2500 MachineBasicBlock &MBB = *Root.getParent(); 2501 bool Found = false; 2502 2503 if (!isCombineInstrCandidate(Opc)) 2504 return 0; 2505 if (isCombineInstrSettingFlag(Opc)) { 2506 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true); 2507 // When NZCV is live bail out. 2508 if (Cmp_NZCV == -1) 2509 return 0; 2510 unsigned NewOpc = convertFlagSettingOpcode(&Root); 2511 // When opcode can't change bail out. 2512 // CHECKME: do we miss any cases for opcode conversion? 2513 if (NewOpc == Opc) 2514 return 0; 2515 Opc = NewOpc; 2516 } 2517 2518 switch (Opc) { 2519 default: 2520 break; 2521 case AArch64::ADDWrr: 2522 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 2523 "ADDWrr does not have register operands"); 2524 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 2525 AArch64::WZR)) { 2526 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1); 2527 Found = true; 2528 } 2529 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, 2530 AArch64::WZR)) { 2531 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2); 2532 Found = true; 2533 } 2534 break; 2535 case AArch64::ADDXrr: 2536 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 2537 AArch64::XZR)) { 2538 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1); 2539 Found = true; 2540 } 2541 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, 2542 AArch64::XZR)) { 2543 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2); 2544 Found = true; 2545 } 2546 break; 2547 case AArch64::SUBWrr: 2548 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 2549 AArch64::WZR)) { 2550 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1); 2551 Found = true; 2552 } 2553 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, 2554 AArch64::WZR)) { 2555 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2); 2556 Found = true; 2557 } 2558 break; 2559 case AArch64::SUBXrr: 2560 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 2561 AArch64::XZR)) { 2562 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1); 2563 Found = true; 2564 } 2565 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, 2566 AArch64::XZR)) { 2567 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2); 2568 Found = true; 2569 } 2570 break; 2571 case AArch64::ADDWri: 2572 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 2573 AArch64::WZR)) { 2574 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1); 2575 Found = true; 2576 } 2577 break; 2578 case AArch64::ADDXri: 2579 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 2580 AArch64::XZR)) { 2581 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1); 2582 Found = true; 2583 } 2584 break; 2585 case AArch64::SUBWri: 2586 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 2587 AArch64::WZR)) { 2588 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1); 2589 Found = true; 2590 } 2591 break; 2592 case AArch64::SUBXri: 2593 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 2594 AArch64::XZR)) { 2595 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1); 2596 Found = true; 2597 } 2598 break; 2599 } 2600 return Found; 2601 } 2602 2603 /// genMadd - Generate madd instruction and combine mul and add. 2604 /// Example: 2605 /// MUL I=A,B,0 2606 /// ADD R,I,C 2607 /// ==> MADD R,A,B,C 2608 /// \param Root is the ADD instruction 2609 /// \param [out] InsInstrs is a vector of machine instructions and will 2610 /// contain the generated madd instruction 2611 /// \param IdxMulOpd is index of operand in Root that is the result of 2612 /// the MUL. In the example above IdxMulOpd is 1. 2613 /// \param MaddOpc the opcode fo the madd instruction 2614 static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo &MRI, 2615 const TargetInstrInfo *TII, MachineInstr &Root, 2616 SmallVectorImpl<MachineInstr *> &InsInstrs, 2617 unsigned IdxMulOpd, unsigned MaddOpc, 2618 const TargetRegisterClass *RC) { 2619 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 2620 2621 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; 2622 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 2623 unsigned ResultReg = Root.getOperand(0).getReg(); 2624 unsigned SrcReg0 = MUL->getOperand(1).getReg(); 2625 bool Src0IsKill = MUL->getOperand(1).isKill(); 2626 unsigned SrcReg1 = MUL->getOperand(2).getReg(); 2627 bool Src1IsKill = MUL->getOperand(2).isKill(); 2628 unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); 2629 bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); 2630 2631 if (TargetRegisterInfo::isVirtualRegister(ResultReg)) 2632 MRI.constrainRegClass(ResultReg, RC); 2633 if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) 2634 MRI.constrainRegClass(SrcReg0, RC); 2635 if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) 2636 MRI.constrainRegClass(SrcReg1, RC); 2637 if (TargetRegisterInfo::isVirtualRegister(SrcReg2)) 2638 MRI.constrainRegClass(SrcReg2, RC); 2639 2640 MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), 2641 ResultReg) 2642 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 2643 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 2644 .addReg(SrcReg2, getKillRegState(Src2IsKill)); 2645 // Insert the MADD 2646 InsInstrs.push_back(MIB); 2647 return MUL; 2648 } 2649 2650 /// genMaddR - Generate madd instruction and combine mul and add using 2651 /// an extra virtual register 2652 /// Example - an ADD intermediate needs to be stored in a register: 2653 /// MUL I=A,B,0 2654 /// ADD R,I,Imm 2655 /// ==> ORR V, ZR, Imm 2656 /// ==> MADD R,A,B,V 2657 /// \param Root is the ADD instruction 2658 /// \param [out] InsInstrs is a vector of machine instructions and will 2659 /// contain the generated madd instruction 2660 /// \param IdxMulOpd is index of operand in Root that is the result of 2661 /// the MUL. In the example above IdxMulOpd is 1. 2662 /// \param MaddOpc the opcode fo the madd instruction 2663 /// \param VR is a virtual register that holds the value of an ADD operand 2664 /// (V in the example above). 2665 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, 2666 const TargetInstrInfo *TII, MachineInstr &Root, 2667 SmallVectorImpl<MachineInstr *> &InsInstrs, 2668 unsigned IdxMulOpd, unsigned MaddOpc, 2669 unsigned VR, const TargetRegisterClass *RC) { 2670 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 2671 2672 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 2673 unsigned ResultReg = Root.getOperand(0).getReg(); 2674 unsigned SrcReg0 = MUL->getOperand(1).getReg(); 2675 bool Src0IsKill = MUL->getOperand(1).isKill(); 2676 unsigned SrcReg1 = MUL->getOperand(2).getReg(); 2677 bool Src1IsKill = MUL->getOperand(2).isKill(); 2678 2679 if (TargetRegisterInfo::isVirtualRegister(ResultReg)) 2680 MRI.constrainRegClass(ResultReg, RC); 2681 if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) 2682 MRI.constrainRegClass(SrcReg0, RC); 2683 if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) 2684 MRI.constrainRegClass(SrcReg1, RC); 2685 if (TargetRegisterInfo::isVirtualRegister(VR)) 2686 MRI.constrainRegClass(VR, RC); 2687 2688 MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), 2689 ResultReg) 2690 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 2691 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 2692 .addReg(VR); 2693 // Insert the MADD 2694 InsInstrs.push_back(MIB); 2695 return MUL; 2696 } 2697 2698 /// When getMachineCombinerPatterns() finds potential patterns, 2699 /// this function generates the instructions that could replace the 2700 /// original code sequence 2701 void AArch64InstrInfo::genAlternativeCodeSequence( 2702 MachineInstr &Root, MachineCombinerPattern Pattern, 2703 SmallVectorImpl<MachineInstr *> &InsInstrs, 2704 SmallVectorImpl<MachineInstr *> &DelInstrs, 2705 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 2706 MachineBasicBlock &MBB = *Root.getParent(); 2707 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2708 MachineFunction &MF = *MBB.getParent(); 2709 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 2710 2711 MachineInstr *MUL; 2712 const TargetRegisterClass *RC; 2713 unsigned Opc; 2714 switch (Pattern) { 2715 default: 2716 // signal error. 2717 break; 2718 case MachineCombinerPattern::MULADDW_OP1: 2719 case MachineCombinerPattern::MULADDX_OP1: 2720 // MUL I=A,B,0 2721 // ADD R,I,C 2722 // ==> MADD R,A,B,C 2723 // --- Create(MADD); 2724 if (Pattern == MachineCombinerPattern::MULADDW_OP1) { 2725 Opc = AArch64::MADDWrrr; 2726 RC = &AArch64::GPR32RegClass; 2727 } else { 2728 Opc = AArch64::MADDXrrr; 2729 RC = &AArch64::GPR64RegClass; 2730 } 2731 MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 2732 break; 2733 case MachineCombinerPattern::MULADDW_OP2: 2734 case MachineCombinerPattern::MULADDX_OP2: 2735 // MUL I=A,B,0 2736 // ADD R,C,I 2737 // ==> MADD R,A,B,C 2738 // --- Create(MADD); 2739 if (Pattern == MachineCombinerPattern::MULADDW_OP2) { 2740 Opc = AArch64::MADDWrrr; 2741 RC = &AArch64::GPR32RegClass; 2742 } else { 2743 Opc = AArch64::MADDXrrr; 2744 RC = &AArch64::GPR64RegClass; 2745 } 2746 MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 2747 break; 2748 case MachineCombinerPattern::MULADDWI_OP1: 2749 case MachineCombinerPattern::MULADDXI_OP1: { 2750 // MUL I=A,B,0 2751 // ADD R,I,Imm 2752 // ==> ORR V, ZR, Imm 2753 // ==> MADD R,A,B,V 2754 // --- Create(MADD); 2755 const TargetRegisterClass *OrrRC; 2756 unsigned BitSize, OrrOpc, ZeroReg; 2757 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) { 2758 OrrOpc = AArch64::ORRWri; 2759 OrrRC = &AArch64::GPR32spRegClass; 2760 BitSize = 32; 2761 ZeroReg = AArch64::WZR; 2762 Opc = AArch64::MADDWrrr; 2763 RC = &AArch64::GPR32RegClass; 2764 } else { 2765 OrrOpc = AArch64::ORRXri; 2766 OrrRC = &AArch64::GPR64spRegClass; 2767 BitSize = 64; 2768 ZeroReg = AArch64::XZR; 2769 Opc = AArch64::MADDXrrr; 2770 RC = &AArch64::GPR64RegClass; 2771 } 2772 unsigned NewVR = MRI.createVirtualRegister(OrrRC); 2773 uint64_t Imm = Root.getOperand(2).getImm(); 2774 2775 if (Root.getOperand(3).isImm()) { 2776 unsigned Val = Root.getOperand(3).getImm(); 2777 Imm = Imm << Val; 2778 } 2779 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 2780 uint64_t Encoding; 2781 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 2782 MachineInstrBuilder MIB1 = 2783 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 2784 .addReg(ZeroReg) 2785 .addImm(Encoding); 2786 InsInstrs.push_back(MIB1); 2787 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 2788 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 2789 } 2790 break; 2791 } 2792 case MachineCombinerPattern::MULSUBW_OP1: 2793 case MachineCombinerPattern::MULSUBX_OP1: { 2794 // MUL I=A,B,0 2795 // SUB R,I, C 2796 // ==> SUB V, 0, C 2797 // ==> MADD R,A,B,V // = -C + A*B 2798 // --- Create(MADD); 2799 const TargetRegisterClass *SubRC; 2800 unsigned SubOpc, ZeroReg; 2801 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) { 2802 SubOpc = AArch64::SUBWrr; 2803 SubRC = &AArch64::GPR32spRegClass; 2804 ZeroReg = AArch64::WZR; 2805 Opc = AArch64::MADDWrrr; 2806 RC = &AArch64::GPR32RegClass; 2807 } else { 2808 SubOpc = AArch64::SUBXrr; 2809 SubRC = &AArch64::GPR64spRegClass; 2810 ZeroReg = AArch64::XZR; 2811 Opc = AArch64::MADDXrrr; 2812 RC = &AArch64::GPR64RegClass; 2813 } 2814 unsigned NewVR = MRI.createVirtualRegister(SubRC); 2815 // SUB NewVR, 0, C 2816 MachineInstrBuilder MIB1 = 2817 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) 2818 .addReg(ZeroReg) 2819 .addOperand(Root.getOperand(2)); 2820 InsInstrs.push_back(MIB1); 2821 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 2822 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 2823 break; 2824 } 2825 case MachineCombinerPattern::MULSUBW_OP2: 2826 case MachineCombinerPattern::MULSUBX_OP2: 2827 // MUL I=A,B,0 2828 // SUB R,C,I 2829 // ==> MSUB R,A,B,C (computes C - A*B) 2830 // --- Create(MSUB); 2831 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) { 2832 Opc = AArch64::MSUBWrrr; 2833 RC = &AArch64::GPR32RegClass; 2834 } else { 2835 Opc = AArch64::MSUBXrrr; 2836 RC = &AArch64::GPR64RegClass; 2837 } 2838 MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 2839 break; 2840 case MachineCombinerPattern::MULSUBWI_OP1: 2841 case MachineCombinerPattern::MULSUBXI_OP1: { 2842 // MUL I=A,B,0 2843 // SUB R,I, Imm 2844 // ==> ORR V, ZR, -Imm 2845 // ==> MADD R,A,B,V // = -Imm + A*B 2846 // --- Create(MADD); 2847 const TargetRegisterClass *OrrRC; 2848 unsigned BitSize, OrrOpc, ZeroReg; 2849 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) { 2850 OrrOpc = AArch64::ORRWri; 2851 OrrRC = &AArch64::GPR32spRegClass; 2852 BitSize = 32; 2853 ZeroReg = AArch64::WZR; 2854 Opc = AArch64::MADDWrrr; 2855 RC = &AArch64::GPR32RegClass; 2856 } else { 2857 OrrOpc = AArch64::ORRXri; 2858 OrrRC = &AArch64::GPR64spRegClass; 2859 BitSize = 64; 2860 ZeroReg = AArch64::XZR; 2861 Opc = AArch64::MADDXrrr; 2862 RC = &AArch64::GPR64RegClass; 2863 } 2864 unsigned NewVR = MRI.createVirtualRegister(OrrRC); 2865 int Imm = Root.getOperand(2).getImm(); 2866 if (Root.getOperand(3).isImm()) { 2867 unsigned Val = Root.getOperand(3).getImm(); 2868 Imm = Imm << Val; 2869 } 2870 uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize); 2871 uint64_t Encoding; 2872 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 2873 MachineInstrBuilder MIB1 = 2874 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 2875 .addReg(ZeroReg) 2876 .addImm(Encoding); 2877 InsInstrs.push_back(MIB1); 2878 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 2879 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 2880 } 2881 break; 2882 } 2883 } // end switch (Pattern) 2884 // Record MUL and ADD/SUB for deletion 2885 DelInstrs.push_back(MUL); 2886 DelInstrs.push_back(&Root); 2887 2888 return; 2889 } 2890 2891 /// \brief Replace csincr-branch sequence by simple conditional branch 2892 /// 2893 /// Examples: 2894 /// 1. 2895 /// csinc w9, wzr, wzr, <condition code> 2896 /// tbnz w9, #0, 0x44 2897 /// to 2898 /// b.<inverted condition code> 2899 /// 2900 /// 2. 2901 /// csinc w9, wzr, wzr, <condition code> 2902 /// tbz w9, #0, 0x44 2903 /// to 2904 /// b.<condition code> 2905 /// 2906 /// \param MI Conditional Branch 2907 /// \return True when the simple conditional branch is generated 2908 /// 2909 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const { 2910 bool IsNegativeBranch = false; 2911 bool IsTestAndBranch = false; 2912 unsigned TargetBBInMI = 0; 2913 switch (MI->getOpcode()) { 2914 default: 2915 llvm_unreachable("Unknown branch instruction?"); 2916 case AArch64::Bcc: 2917 return false; 2918 case AArch64::CBZW: 2919 case AArch64::CBZX: 2920 TargetBBInMI = 1; 2921 break; 2922 case AArch64::CBNZW: 2923 case AArch64::CBNZX: 2924 TargetBBInMI = 1; 2925 IsNegativeBranch = true; 2926 break; 2927 case AArch64::TBZW: 2928 case AArch64::TBZX: 2929 TargetBBInMI = 2; 2930 IsTestAndBranch = true; 2931 break; 2932 case AArch64::TBNZW: 2933 case AArch64::TBNZX: 2934 TargetBBInMI = 2; 2935 IsNegativeBranch = true; 2936 IsTestAndBranch = true; 2937 break; 2938 } 2939 // So we increment a zero register and test for bits other 2940 // than bit 0? Conservatively bail out in case the verifier 2941 // missed this case. 2942 if (IsTestAndBranch && MI->getOperand(1).getImm()) 2943 return false; 2944 2945 // Find Definition. 2946 assert(MI->getParent() && "Incomplete machine instruciton\n"); 2947 MachineBasicBlock *MBB = MI->getParent(); 2948 MachineFunction *MF = MBB->getParent(); 2949 MachineRegisterInfo *MRI = &MF->getRegInfo(); 2950 unsigned VReg = MI->getOperand(0).getReg(); 2951 if (!TargetRegisterInfo::isVirtualRegister(VReg)) 2952 return false; 2953 2954 MachineInstr *DefMI = MRI->getVRegDef(VReg); 2955 2956 // Look for CSINC 2957 if (!(DefMI->getOpcode() == AArch64::CSINCWr && 2958 DefMI->getOperand(1).getReg() == AArch64::WZR && 2959 DefMI->getOperand(2).getReg() == AArch64::WZR) && 2960 !(DefMI->getOpcode() == AArch64::CSINCXr && 2961 DefMI->getOperand(1).getReg() == AArch64::XZR && 2962 DefMI->getOperand(2).getReg() == AArch64::XZR)) 2963 return false; 2964 2965 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) 2966 return false; 2967 2968 AArch64CC::CondCode CC = 2969 (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); 2970 bool CheckOnlyCCWrites = true; 2971 // Convert only when the condition code is not modified between 2972 // the CSINC and the branch. The CC may be used by other 2973 // instructions in between. 2974 if (modifiesConditionCode(DefMI, MI, CheckOnlyCCWrites, &getRegisterInfo())) 2975 return false; 2976 MachineBasicBlock &RefToMBB = *MBB; 2977 MachineBasicBlock *TBB = MI->getOperand(TargetBBInMI).getMBB(); 2978 DebugLoc DL = MI->getDebugLoc(); 2979 if (IsNegativeBranch) 2980 CC = AArch64CC::getInvertedCondCode(CC); 2981 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB); 2982 MI->eraseFromParent(); 2983 return true; 2984 } 2985 2986 std::pair<unsigned, unsigned> 2987 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 2988 const unsigned Mask = AArch64II::MO_FRAGMENT; 2989 return std::make_pair(TF & Mask, TF & ~Mask); 2990 } 2991 2992 ArrayRef<std::pair<unsigned, const char *>> 2993 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 2994 using namespace AArch64II; 2995 static const std::pair<unsigned, const char *> TargetFlags[] = { 2996 {MO_PAGE, "aarch64-page"}, 2997 {MO_PAGEOFF, "aarch64-pageoff"}, 2998 {MO_G3, "aarch64-g3"}, 2999 {MO_G2, "aarch64-g2"}, 3000 {MO_G1, "aarch64-g1"}, 3001 {MO_G0, "aarch64-g0"}, 3002 {MO_HI12, "aarch64-hi12"}}; 3003 return makeArrayRef(TargetFlags); 3004 } 3005 3006 ArrayRef<std::pair<unsigned, const char *>> 3007 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { 3008 using namespace AArch64II; 3009 static const std::pair<unsigned, const char *> TargetFlags[] = { 3010 {MO_GOT, "aarch64-got"}, 3011 {MO_NC, "aarch64-nc"}, 3012 {MO_TLS, "aarch64-tls"}, 3013 {MO_CONSTPOOL, "aarch64-constant-pool"}}; 3014 return makeArrayRef(TargetFlags); 3015 } 3016