1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the AArch64 implementation of the TargetInstrInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64InstrInfo.h" 15 #include "AArch64Subtarget.h" 16 #include "MCTargetDesc/AArch64AddressingModes.h" 17 #include "llvm/CodeGen/MachineFrameInfo.h" 18 #include "llvm/CodeGen/MachineInstrBuilder.h" 19 #include "llvm/CodeGen/MachineMemOperand.h" 20 #include "llvm/CodeGen/MachineRegisterInfo.h" 21 #include "llvm/CodeGen/PseudoSourceValue.h" 22 #include "llvm/MC/MCInst.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Support/TargetRegistry.h" 25 26 using namespace llvm; 27 28 #define GET_INSTRINFO_CTOR_DTOR 29 #include "AArch64GenInstrInfo.inc" 30 31 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) 32 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), 33 RI(this, &STI), Subtarget(STI) {} 34 35 /// GetInstSize - Return the number of bytes of code the specified 36 /// instruction may be. This returns the maximum number of bytes. 37 unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 38 const MachineBasicBlock &MBB = *MI->getParent(); 39 const MachineFunction *MF = MBB.getParent(); 40 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 41 42 if (MI->getOpcode() == AArch64::INLINEASM) 43 return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); 44 45 const MCInstrDesc &Desc = MI->getDesc(); 46 switch (Desc.getOpcode()) { 47 default: 48 // Anything not explicitly designated otherwise is a nomal 4-byte insn. 49 return 4; 50 case TargetOpcode::DBG_VALUE: 51 case TargetOpcode::EH_LABEL: 52 case TargetOpcode::IMPLICIT_DEF: 53 case TargetOpcode::KILL: 54 return 0; 55 } 56 57 llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size"); 58 } 59 60 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, 61 SmallVectorImpl<MachineOperand> &Cond) { 62 // Block ends with fall-through condbranch. 63 switch (LastInst->getOpcode()) { 64 default: 65 llvm_unreachable("Unknown branch instruction?"); 66 case AArch64::Bcc: 67 Target = LastInst->getOperand(1).getMBB(); 68 Cond.push_back(LastInst->getOperand(0)); 69 break; 70 case AArch64::CBZW: 71 case AArch64::CBZX: 72 case AArch64::CBNZW: 73 case AArch64::CBNZX: 74 Target = LastInst->getOperand(1).getMBB(); 75 Cond.push_back(MachineOperand::CreateImm(-1)); 76 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 77 Cond.push_back(LastInst->getOperand(0)); 78 break; 79 case AArch64::TBZW: 80 case AArch64::TBZX: 81 case AArch64::TBNZW: 82 case AArch64::TBNZX: 83 Target = LastInst->getOperand(2).getMBB(); 84 Cond.push_back(MachineOperand::CreateImm(-1)); 85 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 86 Cond.push_back(LastInst->getOperand(0)); 87 Cond.push_back(LastInst->getOperand(1)); 88 } 89 } 90 91 // Branch analysis. 92 bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 93 MachineBasicBlock *&TBB, 94 MachineBasicBlock *&FBB, 95 SmallVectorImpl<MachineOperand> &Cond, 96 bool AllowModify) const { 97 // If the block has no terminators, it just falls into the block after it. 98 MachineBasicBlock::iterator I = MBB.end(); 99 if (I == MBB.begin()) 100 return false; 101 --I; 102 while (I->isDebugValue()) { 103 if (I == MBB.begin()) 104 return false; 105 --I; 106 } 107 if (!isUnpredicatedTerminator(I)) 108 return false; 109 110 // Get the last instruction in the block. 111 MachineInstr *LastInst = I; 112 113 // If there is only one terminator instruction, process it. 114 unsigned LastOpc = LastInst->getOpcode(); 115 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 116 if (isUncondBranchOpcode(LastOpc)) { 117 TBB = LastInst->getOperand(0).getMBB(); 118 return false; 119 } 120 if (isCondBranchOpcode(LastOpc)) { 121 // Block ends with fall-through condbranch. 122 parseCondBranch(LastInst, TBB, Cond); 123 return false; 124 } 125 return true; // Can't handle indirect branch. 126 } 127 128 // Get the instruction before it if it is a terminator. 129 MachineInstr *SecondLastInst = I; 130 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 131 132 // If AllowModify is true and the block ends with two or more unconditional 133 // branches, delete all but the first unconditional branch. 134 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 135 while (isUncondBranchOpcode(SecondLastOpc)) { 136 LastInst->eraseFromParent(); 137 LastInst = SecondLastInst; 138 LastOpc = LastInst->getOpcode(); 139 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 140 // Return now the only terminator is an unconditional branch. 141 TBB = LastInst->getOperand(0).getMBB(); 142 return false; 143 } else { 144 SecondLastInst = I; 145 SecondLastOpc = SecondLastInst->getOpcode(); 146 } 147 } 148 } 149 150 // If there are three terminators, we don't know what sort of block this is. 151 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) 152 return true; 153 154 // If the block ends with a B and a Bcc, handle it. 155 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 156 parseCondBranch(SecondLastInst, TBB, Cond); 157 FBB = LastInst->getOperand(0).getMBB(); 158 return false; 159 } 160 161 // If the block ends with two unconditional branches, handle it. The second 162 // one is not executed, so remove it. 163 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 164 TBB = SecondLastInst->getOperand(0).getMBB(); 165 I = LastInst; 166 if (AllowModify) 167 I->eraseFromParent(); 168 return false; 169 } 170 171 // ...likewise if it ends with an indirect branch followed by an unconditional 172 // branch. 173 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 174 I = LastInst; 175 if (AllowModify) 176 I->eraseFromParent(); 177 return true; 178 } 179 180 // Otherwise, can't handle this. 181 return true; 182 } 183 184 bool AArch64InstrInfo::ReverseBranchCondition( 185 SmallVectorImpl<MachineOperand> &Cond) const { 186 if (Cond[0].getImm() != -1) { 187 // Regular Bcc 188 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 189 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); 190 } else { 191 // Folded compare-and-branch 192 switch (Cond[1].getImm()) { 193 default: 194 llvm_unreachable("Unknown conditional branch!"); 195 case AArch64::CBZW: 196 Cond[1].setImm(AArch64::CBNZW); 197 break; 198 case AArch64::CBNZW: 199 Cond[1].setImm(AArch64::CBZW); 200 break; 201 case AArch64::CBZX: 202 Cond[1].setImm(AArch64::CBNZX); 203 break; 204 case AArch64::CBNZX: 205 Cond[1].setImm(AArch64::CBZX); 206 break; 207 case AArch64::TBZW: 208 Cond[1].setImm(AArch64::TBNZW); 209 break; 210 case AArch64::TBNZW: 211 Cond[1].setImm(AArch64::TBZW); 212 break; 213 case AArch64::TBZX: 214 Cond[1].setImm(AArch64::TBNZX); 215 break; 216 case AArch64::TBNZX: 217 Cond[1].setImm(AArch64::TBZX); 218 break; 219 } 220 } 221 222 return false; 223 } 224 225 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 226 MachineBasicBlock::iterator I = MBB.end(); 227 if (I == MBB.begin()) 228 return 0; 229 --I; 230 while (I->isDebugValue()) { 231 if (I == MBB.begin()) 232 return 0; 233 --I; 234 } 235 if (!isUncondBranchOpcode(I->getOpcode()) && 236 !isCondBranchOpcode(I->getOpcode())) 237 return 0; 238 239 // Remove the branch. 240 I->eraseFromParent(); 241 242 I = MBB.end(); 243 244 if (I == MBB.begin()) 245 return 1; 246 --I; 247 if (!isCondBranchOpcode(I->getOpcode())) 248 return 1; 249 250 // Remove the branch. 251 I->eraseFromParent(); 252 return 2; 253 } 254 255 void AArch64InstrInfo::instantiateCondBranch( 256 MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB, 257 const SmallVectorImpl<MachineOperand> &Cond) const { 258 if (Cond[0].getImm() != -1) { 259 // Regular Bcc 260 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); 261 } else { 262 // Folded compare-and-branch 263 const MachineInstrBuilder MIB = 264 BuildMI(&MBB, DL, get(Cond[1].getImm())).addReg(Cond[2].getReg()); 265 if (Cond.size() > 3) 266 MIB.addImm(Cond[3].getImm()); 267 MIB.addMBB(TBB); 268 } 269 } 270 271 unsigned AArch64InstrInfo::InsertBranch( 272 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 273 const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const { 274 // Shouldn't be a fall through. 275 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 276 277 if (!FBB) { 278 if (Cond.empty()) // Unconditional branch? 279 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); 280 else 281 instantiateCondBranch(MBB, DL, TBB, Cond); 282 return 1; 283 } 284 285 // Two-way conditional branch. 286 instantiateCondBranch(MBB, DL, TBB, Cond); 287 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); 288 return 2; 289 } 290 291 // Find the original register that VReg is copied from. 292 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { 293 while (TargetRegisterInfo::isVirtualRegister(VReg)) { 294 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 295 if (!DefMI->isFullCopy()) 296 return VReg; 297 VReg = DefMI->getOperand(1).getReg(); 298 } 299 return VReg; 300 } 301 302 // Determine if VReg is defined by an instruction that can be folded into a 303 // csel instruction. If so, return the folded opcode, and the replacement 304 // register. 305 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, 306 unsigned *NewVReg = nullptr) { 307 VReg = removeCopies(MRI, VReg); 308 if (!TargetRegisterInfo::isVirtualRegister(VReg)) 309 return 0; 310 311 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); 312 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 313 unsigned Opc = 0; 314 unsigned SrcOpNum = 0; 315 switch (DefMI->getOpcode()) { 316 case AArch64::ADDSXri: 317 case AArch64::ADDSWri: 318 // if NZCV is used, do not fold. 319 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 320 return 0; 321 // fall-through to ADDXri and ADDWri. 322 case AArch64::ADDXri: 323 case AArch64::ADDWri: 324 // add x, 1 -> csinc. 325 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || 326 DefMI->getOperand(3).getImm() != 0) 327 return 0; 328 SrcOpNum = 1; 329 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; 330 break; 331 332 case AArch64::ORNXrr: 333 case AArch64::ORNWrr: { 334 // not x -> csinv, represented as orn dst, xzr, src. 335 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 336 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 337 return 0; 338 SrcOpNum = 2; 339 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; 340 break; 341 } 342 343 case AArch64::SUBSXrr: 344 case AArch64::SUBSWrr: 345 // if NZCV is used, do not fold. 346 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 347 return 0; 348 // fall-through to SUBXrr and SUBWrr. 349 case AArch64::SUBXrr: 350 case AArch64::SUBWrr: { 351 // neg x -> csneg, represented as sub dst, xzr, src. 352 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 353 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 354 return 0; 355 SrcOpNum = 2; 356 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; 357 break; 358 } 359 default: 360 return 0; 361 } 362 assert(Opc && SrcOpNum && "Missing parameters"); 363 364 if (NewVReg) 365 *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); 366 return Opc; 367 } 368 369 bool AArch64InstrInfo::canInsertSelect( 370 const MachineBasicBlock &MBB, const SmallVectorImpl<MachineOperand> &Cond, 371 unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, 372 int &FalseCycles) const { 373 // Check register classes. 374 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 375 const TargetRegisterClass *RC = 376 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 377 if (!RC) 378 return false; 379 380 // Expanding cbz/tbz requires an extra cycle of latency on the condition. 381 unsigned ExtraCondLat = Cond.size() != 1; 382 383 // GPRs are handled by csel. 384 // FIXME: Fold in x+1, -x, and ~x when applicable. 385 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || 386 AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 387 // Single-cycle csel, csinc, csinv, and csneg. 388 CondCycles = 1 + ExtraCondLat; 389 TrueCycles = FalseCycles = 1; 390 if (canFoldIntoCSel(MRI, TrueReg)) 391 TrueCycles = 0; 392 else if (canFoldIntoCSel(MRI, FalseReg)) 393 FalseCycles = 0; 394 return true; 395 } 396 397 // Scalar floating point is handled by fcsel. 398 // FIXME: Form fabs, fmin, and fmax when applicable. 399 if (AArch64::FPR64RegClass.hasSubClassEq(RC) || 400 AArch64::FPR32RegClass.hasSubClassEq(RC)) { 401 CondCycles = 5 + ExtraCondLat; 402 TrueCycles = FalseCycles = 2; 403 return true; 404 } 405 406 // Can't do vectors. 407 return false; 408 } 409 410 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, 411 MachineBasicBlock::iterator I, DebugLoc DL, 412 unsigned DstReg, 413 const SmallVectorImpl<MachineOperand> &Cond, 414 unsigned TrueReg, unsigned FalseReg) const { 415 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 416 417 // Parse the condition code, see parseCondBranch() above. 418 AArch64CC::CondCode CC; 419 switch (Cond.size()) { 420 default: 421 llvm_unreachable("Unknown condition opcode in Cond"); 422 case 1: // b.cc 423 CC = AArch64CC::CondCode(Cond[0].getImm()); 424 break; 425 case 3: { // cbz/cbnz 426 // We must insert a compare against 0. 427 bool Is64Bit; 428 switch (Cond[1].getImm()) { 429 default: 430 llvm_unreachable("Unknown branch opcode in Cond"); 431 case AArch64::CBZW: 432 Is64Bit = 0; 433 CC = AArch64CC::EQ; 434 break; 435 case AArch64::CBZX: 436 Is64Bit = 1; 437 CC = AArch64CC::EQ; 438 break; 439 case AArch64::CBNZW: 440 Is64Bit = 0; 441 CC = AArch64CC::NE; 442 break; 443 case AArch64::CBNZX: 444 Is64Bit = 1; 445 CC = AArch64CC::NE; 446 break; 447 } 448 unsigned SrcReg = Cond[2].getReg(); 449 if (Is64Bit) { 450 // cmp reg, #0 is actually subs xzr, reg, #0. 451 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); 452 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) 453 .addReg(SrcReg) 454 .addImm(0) 455 .addImm(0); 456 } else { 457 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); 458 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) 459 .addReg(SrcReg) 460 .addImm(0) 461 .addImm(0); 462 } 463 break; 464 } 465 case 4: { // tbz/tbnz 466 // We must insert a tst instruction. 467 switch (Cond[1].getImm()) { 468 default: 469 llvm_unreachable("Unknown branch opcode in Cond"); 470 case AArch64::TBZW: 471 case AArch64::TBZX: 472 CC = AArch64CC::EQ; 473 break; 474 case AArch64::TBNZW: 475 case AArch64::TBNZX: 476 CC = AArch64CC::NE; 477 break; 478 } 479 // cmp reg, #foo is actually ands xzr, reg, #1<<foo. 480 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW) 481 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR) 482 .addReg(Cond[2].getReg()) 483 .addImm( 484 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32)); 485 else 486 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR) 487 .addReg(Cond[2].getReg()) 488 .addImm( 489 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); 490 break; 491 } 492 } 493 494 unsigned Opc = 0; 495 const TargetRegisterClass *RC = nullptr; 496 bool TryFold = false; 497 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) { 498 RC = &AArch64::GPR64RegClass; 499 Opc = AArch64::CSELXr; 500 TryFold = true; 501 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) { 502 RC = &AArch64::GPR32RegClass; 503 Opc = AArch64::CSELWr; 504 TryFold = true; 505 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) { 506 RC = &AArch64::FPR64RegClass; 507 Opc = AArch64::FCSELDrrr; 508 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) { 509 RC = &AArch64::FPR32RegClass; 510 Opc = AArch64::FCSELSrrr; 511 } 512 assert(RC && "Unsupported regclass"); 513 514 // Try folding simple instructions into the csel. 515 if (TryFold) { 516 unsigned NewVReg = 0; 517 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); 518 if (FoldedOpc) { 519 // The folded opcodes csinc, csinc and csneg apply the operation to 520 // FalseReg, so we need to invert the condition. 521 CC = AArch64CC::getInvertedCondCode(CC); 522 TrueReg = FalseReg; 523 } else 524 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); 525 526 // Fold the operation. Leave any dead instructions for DCE to clean up. 527 if (FoldedOpc) { 528 FalseReg = NewVReg; 529 Opc = FoldedOpc; 530 // The extends the live range of NewVReg. 531 MRI.clearKillFlags(NewVReg); 532 } 533 } 534 535 // Pull all virtual register into the appropriate class. 536 MRI.constrainRegClass(TrueReg, RC); 537 MRI.constrainRegClass(FalseReg, RC); 538 539 // Insert the csel. 540 BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm( 541 CC); 542 } 543 544 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 545 unsigned &SrcReg, unsigned &DstReg, 546 unsigned &SubIdx) const { 547 switch (MI.getOpcode()) { 548 default: 549 return false; 550 case AArch64::SBFMXri: // aka sxtw 551 case AArch64::UBFMXri: // aka uxtw 552 // Check for the 32 -> 64 bit extension case, these instructions can do 553 // much more. 554 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) 555 return false; 556 // This is a signed or unsigned 32 -> 64 bit extension. 557 SrcReg = MI.getOperand(1).getReg(); 558 DstReg = MI.getOperand(0).getReg(); 559 SubIdx = AArch64::sub_32; 560 return true; 561 } 562 } 563 564 /// analyzeCompare - For a comparison instruction, return the source registers 565 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. 566 /// Return true if the comparison instruction can be analyzed. 567 bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, 568 unsigned &SrcReg2, int &CmpMask, 569 int &CmpValue) const { 570 switch (MI->getOpcode()) { 571 default: 572 break; 573 case AArch64::SUBSWrr: 574 case AArch64::SUBSWrs: 575 case AArch64::SUBSWrx: 576 case AArch64::SUBSXrr: 577 case AArch64::SUBSXrs: 578 case AArch64::SUBSXrx: 579 case AArch64::ADDSWrr: 580 case AArch64::ADDSWrs: 581 case AArch64::ADDSWrx: 582 case AArch64::ADDSXrr: 583 case AArch64::ADDSXrs: 584 case AArch64::ADDSXrx: 585 // Replace SUBSWrr with SUBWrr if NZCV is not used. 586 SrcReg = MI->getOperand(1).getReg(); 587 SrcReg2 = MI->getOperand(2).getReg(); 588 CmpMask = ~0; 589 CmpValue = 0; 590 return true; 591 case AArch64::SUBSWri: 592 case AArch64::ADDSWri: 593 case AArch64::SUBSXri: 594 case AArch64::ADDSXri: 595 SrcReg = MI->getOperand(1).getReg(); 596 SrcReg2 = 0; 597 CmpMask = ~0; 598 CmpValue = MI->getOperand(2).getImm(); 599 return true; 600 case AArch64::ANDSWri: 601 case AArch64::ANDSXri: 602 // ANDS does not use the same encoding scheme as the others xxxS 603 // instructions. 604 SrcReg = MI->getOperand(1).getReg(); 605 SrcReg2 = 0; 606 CmpMask = ~0; 607 CmpValue = AArch64_AM::decodeLogicalImmediate( 608 MI->getOperand(2).getImm(), 609 MI->getOpcode() == AArch64::ANDSWri ? 32 : 64); 610 return true; 611 } 612 613 return false; 614 } 615 616 static bool UpdateOperandRegClass(MachineInstr *Instr) { 617 MachineBasicBlock *MBB = Instr->getParent(); 618 assert(MBB && "Can't get MachineBasicBlock here"); 619 MachineFunction *MF = MBB->getParent(); 620 assert(MF && "Can't get MachineFunction here"); 621 const TargetMachine *TM = &MF->getTarget(); 622 const TargetInstrInfo *TII = TM->getInstrInfo(); 623 const TargetRegisterInfo *TRI = TM->getRegisterInfo(); 624 MachineRegisterInfo *MRI = &MF->getRegInfo(); 625 626 for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx; 627 ++OpIdx) { 628 MachineOperand &MO = Instr->getOperand(OpIdx); 629 const TargetRegisterClass *OpRegCstraints = 630 Instr->getRegClassConstraint(OpIdx, TII, TRI); 631 632 // If there's no constraint, there's nothing to do. 633 if (!OpRegCstraints) 634 continue; 635 // If the operand is a frame index, there's nothing to do here. 636 // A frame index operand will resolve correctly during PEI. 637 if (MO.isFI()) 638 continue; 639 640 assert(MO.isReg() && 641 "Operand has register constraints without being a register!"); 642 643 unsigned Reg = MO.getReg(); 644 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 645 if (!OpRegCstraints->contains(Reg)) 646 return false; 647 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && 648 !MRI->constrainRegClass(Reg, OpRegCstraints)) 649 return false; 650 } 651 652 return true; 653 } 654 655 /// optimizeCompareInstr - Convert the instruction supplying the argument to the 656 /// comparison into one that sets the zero bit in the flags register. 657 bool AArch64InstrInfo::optimizeCompareInstr( 658 MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, 659 int CmpValue, const MachineRegisterInfo *MRI) const { 660 661 // Replace SUBSWrr with SUBWrr if NZCV is not used. 662 int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true); 663 if (Cmp_NZCV != -1) { 664 unsigned NewOpc; 665 switch (CmpInstr->getOpcode()) { 666 default: 667 return false; 668 case AArch64::ADDSWrr: NewOpc = AArch64::ADDWrr; break; 669 case AArch64::ADDSWri: NewOpc = AArch64::ADDWri; break; 670 case AArch64::ADDSWrs: NewOpc = AArch64::ADDWrs; break; 671 case AArch64::ADDSWrx: NewOpc = AArch64::ADDWrx; break; 672 case AArch64::ADDSXrr: NewOpc = AArch64::ADDXrr; break; 673 case AArch64::ADDSXri: NewOpc = AArch64::ADDXri; break; 674 case AArch64::ADDSXrs: NewOpc = AArch64::ADDXrs; break; 675 case AArch64::ADDSXrx: NewOpc = AArch64::ADDXrx; break; 676 case AArch64::SUBSWrr: NewOpc = AArch64::SUBWrr; break; 677 case AArch64::SUBSWri: NewOpc = AArch64::SUBWri; break; 678 case AArch64::SUBSWrs: NewOpc = AArch64::SUBWrs; break; 679 case AArch64::SUBSWrx: NewOpc = AArch64::SUBWrx; break; 680 case AArch64::SUBSXrr: NewOpc = AArch64::SUBXrr; break; 681 case AArch64::SUBSXri: NewOpc = AArch64::SUBXri; break; 682 case AArch64::SUBSXrs: NewOpc = AArch64::SUBXrs; break; 683 case AArch64::SUBSXrx: NewOpc = AArch64::SUBXrx; break; 684 } 685 686 const MCInstrDesc &MCID = get(NewOpc); 687 CmpInstr->setDesc(MCID); 688 CmpInstr->RemoveOperand(Cmp_NZCV); 689 bool succeeded = UpdateOperandRegClass(CmpInstr); 690 (void)succeeded; 691 assert(succeeded && "Some operands reg class are incompatible!"); 692 return true; 693 } 694 695 // Continue only if we have a "ri" where immediate is zero. 696 if (CmpValue != 0 || SrcReg2 != 0) 697 return false; 698 699 // CmpInstr is a Compare instruction if destination register is not used. 700 if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg())) 701 return false; 702 703 // Get the unique definition of SrcReg. 704 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 705 if (!MI) 706 return false; 707 708 // We iterate backward, starting from the instruction before CmpInstr and 709 // stop when reaching the definition of the source register or done with the 710 // basic block, to check whether NZCV is used or modified in between. 711 MachineBasicBlock::iterator I = CmpInstr, E = MI, 712 B = CmpInstr->getParent()->begin(); 713 714 // Early exit if CmpInstr is at the beginning of the BB. 715 if (I == B) 716 return false; 717 718 // Check whether the definition of SrcReg is in the same basic block as 719 // Compare. If not, we can't optimize away the Compare. 720 if (MI->getParent() != CmpInstr->getParent()) 721 return false; 722 723 // Check that NZCV isn't set between the comparison instruction and the one we 724 // want to change. 725 const TargetRegisterInfo *TRI = &getRegisterInfo(); 726 for (--I; I != E; --I) { 727 const MachineInstr &Instr = *I; 728 729 if (Instr.modifiesRegister(AArch64::NZCV, TRI) || 730 Instr.readsRegister(AArch64::NZCV, TRI)) 731 // This instruction modifies or uses NZCV after the one we want to 732 // change. We can't do this transformation. 733 return false; 734 if (I == B) 735 // The 'and' is below the comparison instruction. 736 return false; 737 } 738 739 unsigned NewOpc = MI->getOpcode(); 740 switch (MI->getOpcode()) { 741 default: 742 return false; 743 case AArch64::ADDSWrr: 744 case AArch64::ADDSWri: 745 case AArch64::ADDSXrr: 746 case AArch64::ADDSXri: 747 case AArch64::SUBSWrr: 748 case AArch64::SUBSWri: 749 case AArch64::SUBSXrr: 750 case AArch64::SUBSXri: 751 break; 752 case AArch64::ADDWrr: NewOpc = AArch64::ADDSWrr; break; 753 case AArch64::ADDWri: NewOpc = AArch64::ADDSWri; break; 754 case AArch64::ADDXrr: NewOpc = AArch64::ADDSXrr; break; 755 case AArch64::ADDXri: NewOpc = AArch64::ADDSXri; break; 756 case AArch64::ADCWr: NewOpc = AArch64::ADCSWr; break; 757 case AArch64::ADCXr: NewOpc = AArch64::ADCSXr; break; 758 case AArch64::SUBWrr: NewOpc = AArch64::SUBSWrr; break; 759 case AArch64::SUBWri: NewOpc = AArch64::SUBSWri; break; 760 case AArch64::SUBXrr: NewOpc = AArch64::SUBSXrr; break; 761 case AArch64::SUBXri: NewOpc = AArch64::SUBSXri; break; 762 case AArch64::SBCWr: NewOpc = AArch64::SBCSWr; break; 763 case AArch64::SBCXr: NewOpc = AArch64::SBCSXr; break; 764 case AArch64::ANDWri: NewOpc = AArch64::ANDSWri; break; 765 case AArch64::ANDXri: NewOpc = AArch64::ANDSXri; break; 766 } 767 768 // Scan forward for the use of NZCV. 769 // When checking against MI: if it's a conditional code requires 770 // checking of V bit, then this is not safe to do. 771 // It is safe to remove CmpInstr if NZCV is redefined or killed. 772 // If we are done with the basic block, we need to check whether NZCV is 773 // live-out. 774 bool IsSafe = false; 775 for (MachineBasicBlock::iterator I = CmpInstr, 776 E = CmpInstr->getParent()->end(); 777 !IsSafe && ++I != E;) { 778 const MachineInstr &Instr = *I; 779 for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO; 780 ++IO) { 781 const MachineOperand &MO = Instr.getOperand(IO); 782 if (MO.isRegMask() && MO.clobbersPhysReg(AArch64::NZCV)) { 783 IsSafe = true; 784 break; 785 } 786 if (!MO.isReg() || MO.getReg() != AArch64::NZCV) 787 continue; 788 if (MO.isDef()) { 789 IsSafe = true; 790 break; 791 } 792 793 // Decode the condition code. 794 unsigned Opc = Instr.getOpcode(); 795 AArch64CC::CondCode CC; 796 switch (Opc) { 797 default: 798 return false; 799 case AArch64::Bcc: 800 CC = (AArch64CC::CondCode)Instr.getOperand(IO - 2).getImm(); 801 break; 802 case AArch64::CSINVWr: 803 case AArch64::CSINVXr: 804 case AArch64::CSINCWr: 805 case AArch64::CSINCXr: 806 case AArch64::CSELWr: 807 case AArch64::CSELXr: 808 case AArch64::CSNEGWr: 809 case AArch64::CSNEGXr: 810 case AArch64::FCSELSrrr: 811 case AArch64::FCSELDrrr: 812 CC = (AArch64CC::CondCode)Instr.getOperand(IO - 1).getImm(); 813 break; 814 } 815 816 // It is not safe to remove Compare instruction if Overflow(V) is used. 817 switch (CC) { 818 default: 819 // NZCV can be used multiple times, we should continue. 820 break; 821 case AArch64CC::VS: 822 case AArch64CC::VC: 823 case AArch64CC::GE: 824 case AArch64CC::LT: 825 case AArch64CC::GT: 826 case AArch64CC::LE: 827 return false; 828 } 829 } 830 } 831 832 // If NZCV is not killed nor re-defined, we should check whether it is 833 // live-out. If it is live-out, do not optimize. 834 if (!IsSafe) { 835 MachineBasicBlock *ParentBlock = CmpInstr->getParent(); 836 for (auto *MBB : ParentBlock->successors()) 837 if (MBB->isLiveIn(AArch64::NZCV)) 838 return false; 839 } 840 841 // Update the instruction to set NZCV. 842 MI->setDesc(get(NewOpc)); 843 CmpInstr->eraseFromParent(); 844 bool succeeded = UpdateOperandRegClass(MI); 845 (void)succeeded; 846 assert(succeeded && "Some operands reg class are incompatible!"); 847 MI->addRegisterDefined(AArch64::NZCV, TRI); 848 return true; 849 } 850 851 /// Return true if this is this instruction has a non-zero immediate 852 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const { 853 switch (MI->getOpcode()) { 854 default: 855 break; 856 case AArch64::ADDSWrs: 857 case AArch64::ADDSXrs: 858 case AArch64::ADDWrs: 859 case AArch64::ADDXrs: 860 case AArch64::ANDSWrs: 861 case AArch64::ANDSXrs: 862 case AArch64::ANDWrs: 863 case AArch64::ANDXrs: 864 case AArch64::BICSWrs: 865 case AArch64::BICSXrs: 866 case AArch64::BICWrs: 867 case AArch64::BICXrs: 868 case AArch64::CRC32Brr: 869 case AArch64::CRC32CBrr: 870 case AArch64::CRC32CHrr: 871 case AArch64::CRC32CWrr: 872 case AArch64::CRC32CXrr: 873 case AArch64::CRC32Hrr: 874 case AArch64::CRC32Wrr: 875 case AArch64::CRC32Xrr: 876 case AArch64::EONWrs: 877 case AArch64::EONXrs: 878 case AArch64::EORWrs: 879 case AArch64::EORXrs: 880 case AArch64::ORNWrs: 881 case AArch64::ORNXrs: 882 case AArch64::ORRWrs: 883 case AArch64::ORRXrs: 884 case AArch64::SUBSWrs: 885 case AArch64::SUBSXrs: 886 case AArch64::SUBWrs: 887 case AArch64::SUBXrs: 888 if (MI->getOperand(3).isImm()) { 889 unsigned val = MI->getOperand(3).getImm(); 890 return (val != 0); 891 } 892 break; 893 } 894 return false; 895 } 896 897 /// Return true if this is this instruction has a non-zero immediate 898 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr *MI) const { 899 switch (MI->getOpcode()) { 900 default: 901 break; 902 case AArch64::ADDSWrx: 903 case AArch64::ADDSXrx: 904 case AArch64::ADDSXrx64: 905 case AArch64::ADDWrx: 906 case AArch64::ADDXrx: 907 case AArch64::ADDXrx64: 908 case AArch64::SUBSWrx: 909 case AArch64::SUBSXrx: 910 case AArch64::SUBSXrx64: 911 case AArch64::SUBWrx: 912 case AArch64::SUBXrx: 913 case AArch64::SUBXrx64: 914 if (MI->getOperand(3).isImm()) { 915 unsigned val = MI->getOperand(3).getImm(); 916 return (val != 0); 917 } 918 break; 919 } 920 921 return false; 922 } 923 924 // Return true if this instruction simply sets its single destination register 925 // to zero. This is equivalent to a register rename of the zero-register. 926 bool AArch64InstrInfo::isGPRZero(const MachineInstr *MI) const { 927 switch (MI->getOpcode()) { 928 default: 929 break; 930 case AArch64::MOVZWi: 931 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) 932 if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) { 933 assert(MI->getDesc().getNumOperands() == 3 && 934 MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands"); 935 return true; 936 } 937 break; 938 case AArch64::ANDWri: // and Rd, Rzr, #imm 939 return MI->getOperand(1).getReg() == AArch64::WZR; 940 case AArch64::ANDXri: 941 return MI->getOperand(1).getReg() == AArch64::XZR; 942 case TargetOpcode::COPY: 943 return MI->getOperand(1).getReg() == AArch64::WZR; 944 } 945 return false; 946 } 947 948 // Return true if this instruction simply renames a general register without 949 // modifying bits. 950 bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const { 951 switch (MI->getOpcode()) { 952 default: 953 break; 954 case TargetOpcode::COPY: { 955 // GPR32 copies will by lowered to ORRXrs 956 unsigned DstReg = MI->getOperand(0).getReg(); 957 return (AArch64::GPR32RegClass.contains(DstReg) || 958 AArch64::GPR64RegClass.contains(DstReg)); 959 } 960 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) 961 if (MI->getOperand(1).getReg() == AArch64::XZR) { 962 assert(MI->getDesc().getNumOperands() == 4 && 963 MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands"); 964 return true; 965 } 966 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) 967 if (MI->getOperand(2).getImm() == 0) { 968 assert(MI->getDesc().getNumOperands() == 4 && 969 MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands"); 970 return true; 971 } 972 } 973 return false; 974 } 975 976 // Return true if this instruction simply renames a general register without 977 // modifying bits. 978 bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const { 979 switch (MI->getOpcode()) { 980 default: 981 break; 982 case TargetOpcode::COPY: { 983 // FPR64 copies will by lowered to ORR.16b 984 unsigned DstReg = MI->getOperand(0).getReg(); 985 return (AArch64::FPR64RegClass.contains(DstReg) || 986 AArch64::FPR128RegClass.contains(DstReg)); 987 } 988 case AArch64::ORRv16i8: 989 if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { 990 assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() && 991 "invalid ORRv16i8 operands"); 992 return true; 993 } 994 } 995 return false; 996 } 997 998 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 999 int &FrameIndex) const { 1000 switch (MI->getOpcode()) { 1001 default: 1002 break; 1003 case AArch64::LDRWui: 1004 case AArch64::LDRXui: 1005 case AArch64::LDRBui: 1006 case AArch64::LDRHui: 1007 case AArch64::LDRSui: 1008 case AArch64::LDRDui: 1009 case AArch64::LDRQui: 1010 if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && 1011 MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { 1012 FrameIndex = MI->getOperand(1).getIndex(); 1013 return MI->getOperand(0).getReg(); 1014 } 1015 break; 1016 } 1017 1018 return 0; 1019 } 1020 1021 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr *MI, 1022 int &FrameIndex) const { 1023 switch (MI->getOpcode()) { 1024 default: 1025 break; 1026 case AArch64::STRWui: 1027 case AArch64::STRXui: 1028 case AArch64::STRBui: 1029 case AArch64::STRHui: 1030 case AArch64::STRSui: 1031 case AArch64::STRDui: 1032 case AArch64::STRQui: 1033 if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && 1034 MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { 1035 FrameIndex = MI->getOperand(1).getIndex(); 1036 return MI->getOperand(0).getReg(); 1037 } 1038 break; 1039 } 1040 return 0; 1041 } 1042 1043 /// Return true if this is load/store scales or extends its register offset. 1044 /// This refers to scaling a dynamic index as opposed to scaled immediates. 1045 /// MI should be a memory op that allows scaled addressing. 1046 bool AArch64InstrInfo::isScaledAddr(const MachineInstr *MI) const { 1047 switch (MI->getOpcode()) { 1048 default: 1049 break; 1050 case AArch64::LDRBBroW: 1051 case AArch64::LDRBroW: 1052 case AArch64::LDRDroW: 1053 case AArch64::LDRHHroW: 1054 case AArch64::LDRHroW: 1055 case AArch64::LDRQroW: 1056 case AArch64::LDRSBWroW: 1057 case AArch64::LDRSBXroW: 1058 case AArch64::LDRSHWroW: 1059 case AArch64::LDRSHXroW: 1060 case AArch64::LDRSWroW: 1061 case AArch64::LDRSroW: 1062 case AArch64::LDRWroW: 1063 case AArch64::LDRXroW: 1064 case AArch64::STRBBroW: 1065 case AArch64::STRBroW: 1066 case AArch64::STRDroW: 1067 case AArch64::STRHHroW: 1068 case AArch64::STRHroW: 1069 case AArch64::STRQroW: 1070 case AArch64::STRSroW: 1071 case AArch64::STRWroW: 1072 case AArch64::STRXroW: 1073 case AArch64::LDRBBroX: 1074 case AArch64::LDRBroX: 1075 case AArch64::LDRDroX: 1076 case AArch64::LDRHHroX: 1077 case AArch64::LDRHroX: 1078 case AArch64::LDRQroX: 1079 case AArch64::LDRSBWroX: 1080 case AArch64::LDRSBXroX: 1081 case AArch64::LDRSHWroX: 1082 case AArch64::LDRSHXroX: 1083 case AArch64::LDRSWroX: 1084 case AArch64::LDRSroX: 1085 case AArch64::LDRWroX: 1086 case AArch64::LDRXroX: 1087 case AArch64::STRBBroX: 1088 case AArch64::STRBroX: 1089 case AArch64::STRDroX: 1090 case AArch64::STRHHroX: 1091 case AArch64::STRHroX: 1092 case AArch64::STRQroX: 1093 case AArch64::STRSroX: 1094 case AArch64::STRWroX: 1095 case AArch64::STRXroX: 1096 1097 unsigned Val = MI->getOperand(3).getImm(); 1098 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val); 1099 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val); 1100 } 1101 return false; 1102 } 1103 1104 /// Check all MachineMemOperands for a hint to suppress pairing. 1105 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const { 1106 assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && 1107 "Too many target MO flags"); 1108 for (auto *MM : MI->memoperands()) { 1109 if (MM->getFlags() & 1110 (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) { 1111 return true; 1112 } 1113 } 1114 return false; 1115 } 1116 1117 /// Set a flag on the first MachineMemOperand to suppress pairing. 1118 void AArch64InstrInfo::suppressLdStPair(MachineInstr *MI) const { 1119 if (MI->memoperands_empty()) 1120 return; 1121 1122 assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && 1123 "Too many target MO flags"); 1124 (*MI->memoperands_begin()) 1125 ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit); 1126 } 1127 1128 bool 1129 AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, 1130 unsigned &Offset, 1131 const TargetRegisterInfo *TRI) const { 1132 switch (LdSt->getOpcode()) { 1133 default: 1134 return false; 1135 case AArch64::STRSui: 1136 case AArch64::STRDui: 1137 case AArch64::STRQui: 1138 case AArch64::STRXui: 1139 case AArch64::STRWui: 1140 case AArch64::LDRSui: 1141 case AArch64::LDRDui: 1142 case AArch64::LDRQui: 1143 case AArch64::LDRXui: 1144 case AArch64::LDRWui: 1145 if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) 1146 return false; 1147 BaseReg = LdSt->getOperand(1).getReg(); 1148 MachineFunction &MF = *LdSt->getParent()->getParent(); 1149 unsigned Width = getRegClass(LdSt->getDesc(), 0, TRI, MF)->getSize(); 1150 Offset = LdSt->getOperand(2).getImm() * Width; 1151 return true; 1152 }; 1153 } 1154 1155 /// Detect opportunities for ldp/stp formation. 1156 /// 1157 /// Only called for LdSt for which getLdStBaseRegImmOfs returns true. 1158 bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, 1159 MachineInstr *SecondLdSt, 1160 unsigned NumLoads) const { 1161 // Only cluster up to a single pair. 1162 if (NumLoads > 1) 1163 return false; 1164 if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode()) 1165 return false; 1166 // getLdStBaseRegImmOfs guarantees that oper 2 isImm. 1167 unsigned Ofs1 = FirstLdSt->getOperand(2).getImm(); 1168 // Allow 6 bits of positive range. 1169 if (Ofs1 > 64) 1170 return false; 1171 // The caller should already have ordered First/SecondLdSt by offset. 1172 unsigned Ofs2 = SecondLdSt->getOperand(2).getImm(); 1173 return Ofs1 + 1 == Ofs2; 1174 } 1175 1176 bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First, 1177 MachineInstr *Second) const { 1178 // Cyclone can fuse CMN, CMP followed by Bcc. 1179 1180 // FIXME: B0 can also fuse: 1181 // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ. 1182 if (Second->getOpcode() != AArch64::Bcc) 1183 return false; 1184 switch (First->getOpcode()) { 1185 default: 1186 return false; 1187 case AArch64::SUBSWri: 1188 case AArch64::ADDSWri: 1189 case AArch64::ANDSWri: 1190 case AArch64::SUBSXri: 1191 case AArch64::ADDSXri: 1192 case AArch64::ANDSXri: 1193 return true; 1194 } 1195 } 1196 1197 MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, 1198 int FrameIx, 1199 uint64_t Offset, 1200 const MDNode *MDPtr, 1201 DebugLoc DL) const { 1202 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) 1203 .addFrameIndex(FrameIx) 1204 .addImm(0) 1205 .addImm(Offset) 1206 .addMetadata(MDPtr); 1207 return &*MIB; 1208 } 1209 1210 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, 1211 unsigned Reg, unsigned SubIdx, 1212 unsigned State, 1213 const TargetRegisterInfo *TRI) { 1214 if (!SubIdx) 1215 return MIB.addReg(Reg, State); 1216 1217 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1218 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 1219 return MIB.addReg(Reg, State, SubIdx); 1220 } 1221 1222 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, 1223 unsigned NumRegs) { 1224 // We really want the positive remainder mod 32 here, that happens to be 1225 // easily obtainable with a mask. 1226 return ((DestReg - SrcReg) & 0x1f) < NumRegs; 1227 } 1228 1229 void AArch64InstrInfo::copyPhysRegTuple( 1230 MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, 1231 unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, 1232 llvm::ArrayRef<unsigned> Indices) const { 1233 assert(Subtarget.hasNEON() && 1234 "Unexpected register copy without NEON"); 1235 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1236 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 1237 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 1238 unsigned NumRegs = Indices.size(); 1239 1240 int SubReg = 0, End = NumRegs, Incr = 1; 1241 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { 1242 SubReg = NumRegs - 1; 1243 End = -1; 1244 Incr = -1; 1245 } 1246 1247 for (; SubReg != End; SubReg += Incr) { 1248 const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode)); 1249 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 1250 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); 1251 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 1252 } 1253 } 1254 1255 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 1256 MachineBasicBlock::iterator I, DebugLoc DL, 1257 unsigned DestReg, unsigned SrcReg, 1258 bool KillSrc) const { 1259 if (AArch64::GPR32spRegClass.contains(DestReg) && 1260 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { 1261 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1262 1263 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { 1264 // If either operand is WSP, expand to ADD #0. 1265 if (Subtarget.hasZeroCycleRegMove()) { 1266 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. 1267 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 1268 &AArch64::GPR64spRegClass); 1269 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 1270 &AArch64::GPR64spRegClass); 1271 // This instruction is reading and writing X registers. This may upset 1272 // the register scavenger and machine verifier, so we need to indicate 1273 // that we are reading an undefined value from SrcRegX, but a proper 1274 // value from SrcReg. 1275 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) 1276 .addReg(SrcRegX, RegState::Undef) 1277 .addImm(0) 1278 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 1279 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 1280 } else { 1281 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) 1282 .addReg(SrcReg, getKillRegState(KillSrc)) 1283 .addImm(0) 1284 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1285 } 1286 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) { 1287 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm( 1288 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1289 } else { 1290 if (Subtarget.hasZeroCycleRegMove()) { 1291 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. 1292 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 1293 &AArch64::GPR64spRegClass); 1294 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 1295 &AArch64::GPR64spRegClass); 1296 // This instruction is reading and writing X registers. This may upset 1297 // the register scavenger and machine verifier, so we need to indicate 1298 // that we are reading an undefined value from SrcRegX, but a proper 1299 // value from SrcReg. 1300 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) 1301 .addReg(AArch64::XZR) 1302 .addReg(SrcRegX, RegState::Undef) 1303 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 1304 } else { 1305 // Otherwise, expand to ORR WZR. 1306 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) 1307 .addReg(AArch64::WZR) 1308 .addReg(SrcReg, getKillRegState(KillSrc)); 1309 } 1310 } 1311 return; 1312 } 1313 1314 if (AArch64::GPR64spRegClass.contains(DestReg) && 1315 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { 1316 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { 1317 // If either operand is SP, expand to ADD #0. 1318 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) 1319 .addReg(SrcReg, getKillRegState(KillSrc)) 1320 .addImm(0) 1321 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1322 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) { 1323 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm( 1324 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1325 } else { 1326 // Otherwise, expand to ORR XZR. 1327 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) 1328 .addReg(AArch64::XZR) 1329 .addReg(SrcReg, getKillRegState(KillSrc)); 1330 } 1331 return; 1332 } 1333 1334 // Copy a DDDD register quad by copying the individual sub-registers. 1335 if (AArch64::DDDDRegClass.contains(DestReg) && 1336 AArch64::DDDDRegClass.contains(SrcReg)) { 1337 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, 1338 AArch64::dsub2, AArch64::dsub3 }; 1339 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 1340 Indices); 1341 return; 1342 } 1343 1344 // Copy a DDD register triple by copying the individual sub-registers. 1345 if (AArch64::DDDRegClass.contains(DestReg) && 1346 AArch64::DDDRegClass.contains(SrcReg)) { 1347 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, 1348 AArch64::dsub2 }; 1349 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 1350 Indices); 1351 return; 1352 } 1353 1354 // Copy a DD register pair by copying the individual sub-registers. 1355 if (AArch64::DDRegClass.contains(DestReg) && 1356 AArch64::DDRegClass.contains(SrcReg)) { 1357 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 }; 1358 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 1359 Indices); 1360 return; 1361 } 1362 1363 // Copy a QQQQ register quad by copying the individual sub-registers. 1364 if (AArch64::QQQQRegClass.contains(DestReg) && 1365 AArch64::QQQQRegClass.contains(SrcReg)) { 1366 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, 1367 AArch64::qsub2, AArch64::qsub3 }; 1368 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 1369 Indices); 1370 return; 1371 } 1372 1373 // Copy a QQQ register triple by copying the individual sub-registers. 1374 if (AArch64::QQQRegClass.contains(DestReg) && 1375 AArch64::QQQRegClass.contains(SrcReg)) { 1376 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, 1377 AArch64::qsub2 }; 1378 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 1379 Indices); 1380 return; 1381 } 1382 1383 // Copy a QQ register pair by copying the individual sub-registers. 1384 if (AArch64::QQRegClass.contains(DestReg) && 1385 AArch64::QQRegClass.contains(SrcReg)) { 1386 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 }; 1387 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 1388 Indices); 1389 return; 1390 } 1391 1392 if (AArch64::FPR128RegClass.contains(DestReg) && 1393 AArch64::FPR128RegClass.contains(SrcReg)) { 1394 if(Subtarget.hasNEON()) { 1395 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1396 .addReg(SrcReg) 1397 .addReg(SrcReg, getKillRegState(KillSrc)); 1398 } else { 1399 BuildMI(MBB, I, DL, get(AArch64::STRQpre)) 1400 .addReg(AArch64::SP, RegState::Define) 1401 .addReg(SrcReg, getKillRegState(KillSrc)) 1402 .addReg(AArch64::SP) 1403 .addImm(-16); 1404 BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) 1405 .addReg(AArch64::SP, RegState::Define) 1406 .addReg(DestReg, RegState::Define) 1407 .addReg(AArch64::SP) 1408 .addImm(16); 1409 } 1410 return; 1411 } 1412 1413 if (AArch64::FPR64RegClass.contains(DestReg) && 1414 AArch64::FPR64RegClass.contains(SrcReg)) { 1415 if(Subtarget.hasNEON()) { 1416 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, 1417 &AArch64::FPR128RegClass); 1418 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, 1419 &AArch64::FPR128RegClass); 1420 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1421 .addReg(SrcReg) 1422 .addReg(SrcReg, getKillRegState(KillSrc)); 1423 } else { 1424 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) 1425 .addReg(SrcReg, getKillRegState(KillSrc)); 1426 } 1427 return; 1428 } 1429 1430 if (AArch64::FPR32RegClass.contains(DestReg) && 1431 AArch64::FPR32RegClass.contains(SrcReg)) { 1432 if(Subtarget.hasNEON()) { 1433 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, 1434 &AArch64::FPR128RegClass); 1435 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, 1436 &AArch64::FPR128RegClass); 1437 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1438 .addReg(SrcReg) 1439 .addReg(SrcReg, getKillRegState(KillSrc)); 1440 } else { 1441 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 1442 .addReg(SrcReg, getKillRegState(KillSrc)); 1443 } 1444 return; 1445 } 1446 1447 if (AArch64::FPR16RegClass.contains(DestReg) && 1448 AArch64::FPR16RegClass.contains(SrcReg)) { 1449 if(Subtarget.hasNEON()) { 1450 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 1451 &AArch64::FPR128RegClass); 1452 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 1453 &AArch64::FPR128RegClass); 1454 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1455 .addReg(SrcReg) 1456 .addReg(SrcReg, getKillRegState(KillSrc)); 1457 } else { 1458 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 1459 &AArch64::FPR32RegClass); 1460 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 1461 &AArch64::FPR32RegClass); 1462 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 1463 .addReg(SrcReg, getKillRegState(KillSrc)); 1464 } 1465 return; 1466 } 1467 1468 if (AArch64::FPR8RegClass.contains(DestReg) && 1469 AArch64::FPR8RegClass.contains(SrcReg)) { 1470 if(Subtarget.hasNEON()) { 1471 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 1472 &AArch64::FPR128RegClass); 1473 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 1474 &AArch64::FPR128RegClass); 1475 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1476 .addReg(SrcReg) 1477 .addReg(SrcReg, getKillRegState(KillSrc)); 1478 } else { 1479 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 1480 &AArch64::FPR32RegClass); 1481 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 1482 &AArch64::FPR32RegClass); 1483 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 1484 .addReg(SrcReg, getKillRegState(KillSrc)); 1485 } 1486 return; 1487 } 1488 1489 // Copies between GPR64 and FPR64. 1490 if (AArch64::FPR64RegClass.contains(DestReg) && 1491 AArch64::GPR64RegClass.contains(SrcReg)) { 1492 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) 1493 .addReg(SrcReg, getKillRegState(KillSrc)); 1494 return; 1495 } 1496 if (AArch64::GPR64RegClass.contains(DestReg) && 1497 AArch64::FPR64RegClass.contains(SrcReg)) { 1498 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) 1499 .addReg(SrcReg, getKillRegState(KillSrc)); 1500 return; 1501 } 1502 // Copies between GPR32 and FPR32. 1503 if (AArch64::FPR32RegClass.contains(DestReg) && 1504 AArch64::GPR32RegClass.contains(SrcReg)) { 1505 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) 1506 .addReg(SrcReg, getKillRegState(KillSrc)); 1507 return; 1508 } 1509 if (AArch64::GPR32RegClass.contains(DestReg) && 1510 AArch64::FPR32RegClass.contains(SrcReg)) { 1511 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) 1512 .addReg(SrcReg, getKillRegState(KillSrc)); 1513 return; 1514 } 1515 1516 if (DestReg == AArch64::NZCV) { 1517 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); 1518 BuildMI(MBB, I, DL, get(AArch64::MSR)) 1519 .addImm(AArch64SysReg::NZCV) 1520 .addReg(SrcReg, getKillRegState(KillSrc)) 1521 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); 1522 return; 1523 } 1524 1525 if (SrcReg == AArch64::NZCV) { 1526 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); 1527 BuildMI(MBB, I, DL, get(AArch64::MRS)) 1528 .addReg(DestReg) 1529 .addImm(AArch64SysReg::NZCV) 1530 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); 1531 return; 1532 } 1533 1534 llvm_unreachable("unimplemented reg-to-reg copy"); 1535 } 1536 1537 void AArch64InstrInfo::storeRegToStackSlot( 1538 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, 1539 bool isKill, int FI, const TargetRegisterClass *RC, 1540 const TargetRegisterInfo *TRI) const { 1541 DebugLoc DL; 1542 if (MBBI != MBB.end()) 1543 DL = MBBI->getDebugLoc(); 1544 MachineFunction &MF = *MBB.getParent(); 1545 MachineFrameInfo &MFI = *MF.getFrameInfo(); 1546 unsigned Align = MFI.getObjectAlignment(FI); 1547 1548 MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); 1549 MachineMemOperand *MMO = MF.getMachineMemOperand( 1550 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); 1551 unsigned Opc = 0; 1552 bool Offset = true; 1553 switch (RC->getSize()) { 1554 case 1: 1555 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 1556 Opc = AArch64::STRBui; 1557 break; 1558 case 2: 1559 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 1560 Opc = AArch64::STRHui; 1561 break; 1562 case 4: 1563 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 1564 Opc = AArch64::STRWui; 1565 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 1566 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 1567 else 1568 assert(SrcReg != AArch64::WSP); 1569 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 1570 Opc = AArch64::STRSui; 1571 break; 1572 case 8: 1573 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 1574 Opc = AArch64::STRXui; 1575 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 1576 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 1577 else 1578 assert(SrcReg != AArch64::SP); 1579 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 1580 Opc = AArch64::STRDui; 1581 break; 1582 case 16: 1583 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 1584 Opc = AArch64::STRQui; 1585 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 1586 assert(Subtarget.hasNEON() && 1587 "Unexpected register store without NEON"); 1588 Opc = AArch64::ST1Twov1d, Offset = false; 1589 } 1590 break; 1591 case 24: 1592 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 1593 assert(Subtarget.hasNEON() && 1594 "Unexpected register store without NEON"); 1595 Opc = AArch64::ST1Threev1d, Offset = false; 1596 } 1597 break; 1598 case 32: 1599 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 1600 assert(Subtarget.hasNEON() && 1601 "Unexpected register store without NEON"); 1602 Opc = AArch64::ST1Fourv1d, Offset = false; 1603 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 1604 assert(Subtarget.hasNEON() && 1605 "Unexpected register store without NEON"); 1606 Opc = AArch64::ST1Twov2d, Offset = false; 1607 } 1608 break; 1609 case 48: 1610 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 1611 assert(Subtarget.hasNEON() && 1612 "Unexpected register store without NEON"); 1613 Opc = AArch64::ST1Threev2d, Offset = false; 1614 } 1615 break; 1616 case 64: 1617 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 1618 assert(Subtarget.hasNEON() && 1619 "Unexpected register store without NEON"); 1620 Opc = AArch64::ST1Fourv2d, Offset = false; 1621 } 1622 break; 1623 } 1624 assert(Opc && "Unknown register class"); 1625 1626 const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) 1627 .addReg(SrcReg, getKillRegState(isKill)) 1628 .addFrameIndex(FI); 1629 1630 if (Offset) 1631 MI.addImm(0); 1632 MI.addMemOperand(MMO); 1633 } 1634 1635 void AArch64InstrInfo::loadRegFromStackSlot( 1636 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, 1637 int FI, const TargetRegisterClass *RC, 1638 const TargetRegisterInfo *TRI) const { 1639 DebugLoc DL; 1640 if (MBBI != MBB.end()) 1641 DL = MBBI->getDebugLoc(); 1642 MachineFunction &MF = *MBB.getParent(); 1643 MachineFrameInfo &MFI = *MF.getFrameInfo(); 1644 unsigned Align = MFI.getObjectAlignment(FI); 1645 MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); 1646 MachineMemOperand *MMO = MF.getMachineMemOperand( 1647 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); 1648 1649 unsigned Opc = 0; 1650 bool Offset = true; 1651 switch (RC->getSize()) { 1652 case 1: 1653 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 1654 Opc = AArch64::LDRBui; 1655 break; 1656 case 2: 1657 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 1658 Opc = AArch64::LDRHui; 1659 break; 1660 case 4: 1661 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 1662 Opc = AArch64::LDRWui; 1663 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 1664 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); 1665 else 1666 assert(DestReg != AArch64::WSP); 1667 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 1668 Opc = AArch64::LDRSui; 1669 break; 1670 case 8: 1671 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 1672 Opc = AArch64::LDRXui; 1673 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 1674 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); 1675 else 1676 assert(DestReg != AArch64::SP); 1677 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 1678 Opc = AArch64::LDRDui; 1679 break; 1680 case 16: 1681 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 1682 Opc = AArch64::LDRQui; 1683 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 1684 assert(Subtarget.hasNEON() && 1685 "Unexpected register load without NEON"); 1686 Opc = AArch64::LD1Twov1d, Offset = false; 1687 } 1688 break; 1689 case 24: 1690 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 1691 assert(Subtarget.hasNEON() && 1692 "Unexpected register load without NEON"); 1693 Opc = AArch64::LD1Threev1d, Offset = false; 1694 } 1695 break; 1696 case 32: 1697 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 1698 assert(Subtarget.hasNEON() && 1699 "Unexpected register load without NEON"); 1700 Opc = AArch64::LD1Fourv1d, Offset = false; 1701 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 1702 assert(Subtarget.hasNEON() && 1703 "Unexpected register load without NEON"); 1704 Opc = AArch64::LD1Twov2d, Offset = false; 1705 } 1706 break; 1707 case 48: 1708 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 1709 assert(Subtarget.hasNEON() && 1710 "Unexpected register load without NEON"); 1711 Opc = AArch64::LD1Threev2d, Offset = false; 1712 } 1713 break; 1714 case 64: 1715 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 1716 assert(Subtarget.hasNEON() && 1717 "Unexpected register load without NEON"); 1718 Opc = AArch64::LD1Fourv2d, Offset = false; 1719 } 1720 break; 1721 } 1722 assert(Opc && "Unknown register class"); 1723 1724 const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) 1725 .addReg(DestReg, getDefRegState(true)) 1726 .addFrameIndex(FI); 1727 if (Offset) 1728 MI.addImm(0); 1729 MI.addMemOperand(MMO); 1730 } 1731 1732 void llvm::emitFrameOffset(MachineBasicBlock &MBB, 1733 MachineBasicBlock::iterator MBBI, DebugLoc DL, 1734 unsigned DestReg, unsigned SrcReg, int Offset, 1735 const TargetInstrInfo *TII, 1736 MachineInstr::MIFlag Flag, bool SetNZCV) { 1737 if (DestReg == SrcReg && Offset == 0) 1738 return; 1739 1740 bool isSub = Offset < 0; 1741 if (isSub) 1742 Offset = -Offset; 1743 1744 // FIXME: If the offset won't fit in 24-bits, compute the offset into a 1745 // scratch register. If DestReg is a virtual register, use it as the 1746 // scratch register; otherwise, create a new virtual register (to be 1747 // replaced by the scavenger at the end of PEI). That case can be optimized 1748 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch 1749 // register can be loaded with offset%8 and the add/sub can use an extending 1750 // instruction with LSL#3. 1751 // Currently the function handles any offsets but generates a poor sequence 1752 // of code. 1753 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); 1754 1755 unsigned Opc; 1756 if (SetNZCV) 1757 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri; 1758 else 1759 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri; 1760 const unsigned MaxEncoding = 0xfff; 1761 const unsigned ShiftSize = 12; 1762 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; 1763 while (((unsigned)Offset) >= (1 << ShiftSize)) { 1764 unsigned ThisVal; 1765 if (((unsigned)Offset) > MaxEncodableValue) { 1766 ThisVal = MaxEncodableValue; 1767 } else { 1768 ThisVal = Offset & MaxEncodableValue; 1769 } 1770 assert((ThisVal >> ShiftSize) <= MaxEncoding && 1771 "Encoding cannot handle value that big"); 1772 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 1773 .addReg(SrcReg) 1774 .addImm(ThisVal >> ShiftSize) 1775 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize)) 1776 .setMIFlag(Flag); 1777 1778 SrcReg = DestReg; 1779 Offset -= ThisVal; 1780 if (Offset == 0) 1781 return; 1782 } 1783 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 1784 .addReg(SrcReg) 1785 .addImm(Offset) 1786 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 1787 .setMIFlag(Flag); 1788 } 1789 1790 MachineInstr * 1791 AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, 1792 const SmallVectorImpl<unsigned> &Ops, 1793 int FrameIndex) const { 1794 // This is a bit of a hack. Consider this instruction: 1795 // 1796 // %vreg0<def> = COPY %SP; GPR64all:%vreg0 1797 // 1798 // We explicitly chose GPR64all for the virtual register so such a copy might 1799 // be eliminated by RegisterCoalescer. However, that may not be possible, and 1800 // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all 1801 // register class, TargetInstrInfo::foldMemoryOperand() is going to try. 1802 // 1803 // To prevent that, we are going to constrain the %vreg0 register class here. 1804 // 1805 // <rdar://problem/11522048> 1806 // 1807 if (MI->isCopy()) { 1808 unsigned DstReg = MI->getOperand(0).getReg(); 1809 unsigned SrcReg = MI->getOperand(1).getReg(); 1810 if (SrcReg == AArch64::SP && 1811 TargetRegisterInfo::isVirtualRegister(DstReg)) { 1812 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); 1813 return nullptr; 1814 } 1815 if (DstReg == AArch64::SP && 1816 TargetRegisterInfo::isVirtualRegister(SrcReg)) { 1817 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 1818 return nullptr; 1819 } 1820 } 1821 1822 // Cannot fold. 1823 return nullptr; 1824 } 1825 1826 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, 1827 bool *OutUseUnscaledOp, 1828 unsigned *OutUnscaledOp, 1829 int *EmittableOffset) { 1830 int Scale = 1; 1831 bool IsSigned = false; 1832 // The ImmIdx should be changed case by case if it is not 2. 1833 unsigned ImmIdx = 2; 1834 unsigned UnscaledOp = 0; 1835 // Set output values in case of early exit. 1836 if (EmittableOffset) 1837 *EmittableOffset = 0; 1838 if (OutUseUnscaledOp) 1839 *OutUseUnscaledOp = false; 1840 if (OutUnscaledOp) 1841 *OutUnscaledOp = 0; 1842 switch (MI.getOpcode()) { 1843 default: 1844 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex"); 1845 // Vector spills/fills can't take an immediate offset. 1846 case AArch64::LD1Twov2d: 1847 case AArch64::LD1Threev2d: 1848 case AArch64::LD1Fourv2d: 1849 case AArch64::LD1Twov1d: 1850 case AArch64::LD1Threev1d: 1851 case AArch64::LD1Fourv1d: 1852 case AArch64::ST1Twov2d: 1853 case AArch64::ST1Threev2d: 1854 case AArch64::ST1Fourv2d: 1855 case AArch64::ST1Twov1d: 1856 case AArch64::ST1Threev1d: 1857 case AArch64::ST1Fourv1d: 1858 return AArch64FrameOffsetCannotUpdate; 1859 case AArch64::PRFMui: 1860 Scale = 8; 1861 UnscaledOp = AArch64::PRFUMi; 1862 break; 1863 case AArch64::LDRXui: 1864 Scale = 8; 1865 UnscaledOp = AArch64::LDURXi; 1866 break; 1867 case AArch64::LDRWui: 1868 Scale = 4; 1869 UnscaledOp = AArch64::LDURWi; 1870 break; 1871 case AArch64::LDRBui: 1872 Scale = 1; 1873 UnscaledOp = AArch64::LDURBi; 1874 break; 1875 case AArch64::LDRHui: 1876 Scale = 2; 1877 UnscaledOp = AArch64::LDURHi; 1878 break; 1879 case AArch64::LDRSui: 1880 Scale = 4; 1881 UnscaledOp = AArch64::LDURSi; 1882 break; 1883 case AArch64::LDRDui: 1884 Scale = 8; 1885 UnscaledOp = AArch64::LDURDi; 1886 break; 1887 case AArch64::LDRQui: 1888 Scale = 16; 1889 UnscaledOp = AArch64::LDURQi; 1890 break; 1891 case AArch64::LDRBBui: 1892 Scale = 1; 1893 UnscaledOp = AArch64::LDURBBi; 1894 break; 1895 case AArch64::LDRHHui: 1896 Scale = 2; 1897 UnscaledOp = AArch64::LDURHHi; 1898 break; 1899 case AArch64::LDRSBXui: 1900 Scale = 1; 1901 UnscaledOp = AArch64::LDURSBXi; 1902 break; 1903 case AArch64::LDRSBWui: 1904 Scale = 1; 1905 UnscaledOp = AArch64::LDURSBWi; 1906 break; 1907 case AArch64::LDRSHXui: 1908 Scale = 2; 1909 UnscaledOp = AArch64::LDURSHXi; 1910 break; 1911 case AArch64::LDRSHWui: 1912 Scale = 2; 1913 UnscaledOp = AArch64::LDURSHWi; 1914 break; 1915 case AArch64::LDRSWui: 1916 Scale = 4; 1917 UnscaledOp = AArch64::LDURSWi; 1918 break; 1919 1920 case AArch64::STRXui: 1921 Scale = 8; 1922 UnscaledOp = AArch64::STURXi; 1923 break; 1924 case AArch64::STRWui: 1925 Scale = 4; 1926 UnscaledOp = AArch64::STURWi; 1927 break; 1928 case AArch64::STRBui: 1929 Scale = 1; 1930 UnscaledOp = AArch64::STURBi; 1931 break; 1932 case AArch64::STRHui: 1933 Scale = 2; 1934 UnscaledOp = AArch64::STURHi; 1935 break; 1936 case AArch64::STRSui: 1937 Scale = 4; 1938 UnscaledOp = AArch64::STURSi; 1939 break; 1940 case AArch64::STRDui: 1941 Scale = 8; 1942 UnscaledOp = AArch64::STURDi; 1943 break; 1944 case AArch64::STRQui: 1945 Scale = 16; 1946 UnscaledOp = AArch64::STURQi; 1947 break; 1948 case AArch64::STRBBui: 1949 Scale = 1; 1950 UnscaledOp = AArch64::STURBBi; 1951 break; 1952 case AArch64::STRHHui: 1953 Scale = 2; 1954 UnscaledOp = AArch64::STURHHi; 1955 break; 1956 1957 case AArch64::LDPXi: 1958 case AArch64::LDPDi: 1959 case AArch64::STPXi: 1960 case AArch64::STPDi: 1961 IsSigned = true; 1962 Scale = 8; 1963 break; 1964 case AArch64::LDPQi: 1965 case AArch64::STPQi: 1966 IsSigned = true; 1967 Scale = 16; 1968 break; 1969 case AArch64::LDPWi: 1970 case AArch64::LDPSi: 1971 case AArch64::STPWi: 1972 case AArch64::STPSi: 1973 IsSigned = true; 1974 Scale = 4; 1975 break; 1976 1977 case AArch64::LDURXi: 1978 case AArch64::LDURWi: 1979 case AArch64::LDURBi: 1980 case AArch64::LDURHi: 1981 case AArch64::LDURSi: 1982 case AArch64::LDURDi: 1983 case AArch64::LDURQi: 1984 case AArch64::LDURHHi: 1985 case AArch64::LDURBBi: 1986 case AArch64::LDURSBXi: 1987 case AArch64::LDURSBWi: 1988 case AArch64::LDURSHXi: 1989 case AArch64::LDURSHWi: 1990 case AArch64::LDURSWi: 1991 case AArch64::STURXi: 1992 case AArch64::STURWi: 1993 case AArch64::STURBi: 1994 case AArch64::STURHi: 1995 case AArch64::STURSi: 1996 case AArch64::STURDi: 1997 case AArch64::STURQi: 1998 case AArch64::STURBBi: 1999 case AArch64::STURHHi: 2000 Scale = 1; 2001 break; 2002 } 2003 2004 Offset += MI.getOperand(ImmIdx).getImm() * Scale; 2005 2006 bool useUnscaledOp = false; 2007 // If the offset doesn't match the scale, we rewrite the instruction to 2008 // use the unscaled instruction instead. Likewise, if we have a negative 2009 // offset (and have an unscaled op to use). 2010 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0)) 2011 useUnscaledOp = true; 2012 2013 // Use an unscaled addressing mode if the instruction has a negative offset 2014 // (or if the instruction is already using an unscaled addressing mode). 2015 unsigned MaskBits; 2016 if (IsSigned) { 2017 // ldp/stp instructions. 2018 MaskBits = 7; 2019 Offset /= Scale; 2020 } else if (UnscaledOp == 0 || useUnscaledOp) { 2021 MaskBits = 9; 2022 IsSigned = true; 2023 Scale = 1; 2024 } else { 2025 MaskBits = 12; 2026 IsSigned = false; 2027 Offset /= Scale; 2028 } 2029 2030 // Attempt to fold address computation. 2031 int MaxOff = (1 << (MaskBits - IsSigned)) - 1; 2032 int MinOff = (IsSigned ? (-MaxOff - 1) : 0); 2033 if (Offset >= MinOff && Offset <= MaxOff) { 2034 if (EmittableOffset) 2035 *EmittableOffset = Offset; 2036 Offset = 0; 2037 } else { 2038 int NewOff = Offset < 0 ? MinOff : MaxOff; 2039 if (EmittableOffset) 2040 *EmittableOffset = NewOff; 2041 Offset = (Offset - NewOff) * Scale; 2042 } 2043 if (OutUseUnscaledOp) 2044 *OutUseUnscaledOp = useUnscaledOp; 2045 if (OutUnscaledOp) 2046 *OutUnscaledOp = UnscaledOp; 2047 return AArch64FrameOffsetCanUpdate | 2048 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0); 2049 } 2050 2051 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 2052 unsigned FrameReg, int &Offset, 2053 const AArch64InstrInfo *TII) { 2054 unsigned Opcode = MI.getOpcode(); 2055 unsigned ImmIdx = FrameRegIdx + 1; 2056 2057 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { 2058 Offset += MI.getOperand(ImmIdx).getImm(); 2059 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), 2060 MI.getOperand(0).getReg(), FrameReg, Offset, TII, 2061 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); 2062 MI.eraseFromParent(); 2063 Offset = 0; 2064 return true; 2065 } 2066 2067 int NewOffset; 2068 unsigned UnscaledOp; 2069 bool UseUnscaledOp; 2070 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, 2071 &UnscaledOp, &NewOffset); 2072 if (Status & AArch64FrameOffsetCanUpdate) { 2073 if (Status & AArch64FrameOffsetIsLegal) 2074 // Replace the FrameIndex with FrameReg. 2075 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 2076 if (UseUnscaledOp) 2077 MI.setDesc(TII->get(UnscaledOp)); 2078 2079 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); 2080 return Offset == 0; 2081 } 2082 2083 return false; 2084 } 2085 2086 void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { 2087 NopInst.setOpcode(AArch64::HINT); 2088 NopInst.addOperand(MCOperand::CreateImm(0)); 2089 } 2090